################################################################################
# This script merges the transcribed audit files into a single standardized
#   dataset for analysis.
#
#
# The individual state files are not necessarily standardized with one another,
#   so in this file we not only merge them to enforce that they cover the same
#   variables, but we also enforce standardization of features like candidate
#   names.
#
#
# written by sbaltz at mit, partially based on code by kguo919 at mit
#   first edited june 2022
#   last edited march 2025
################################################################################
import pandas as pd
import numpy as np
import datetime


################################################################################
# Global variables
################################################################################
VERBOSE = True
SAVE_DIR = "./"
LOG_FILE = SAVE_DIR+"../data_for_analysis/merging_log.txt"
BIG_SEP = '\n' + '-' * 80 + '\n'
LITTLE_SEP = '\n' + '-' * 40 + '\n'

#Initialize merged dataset with explicit column names and types
merged = pd.DataFrame({'state':             pd.Series(dtype='str'),
                       'county':            pd.Series(dtype='str'),
                       'township':          pd.Series(dtype='str'),
                       'district':          pd.Series(dtype='str'),
                       'precinct':          pd.Series(dtype='str'),
                       'division':          pd.Series(dtype='str'),
                       'office':            pd.Series(dtype='str'),
                       'office_cat':        pd.Series(dtype='str'),
                       'candidate':         pd.Series(dtype='str'),
                       'party':             pd.Series(dtype='str'),
                       'original':          pd.Series(dtype='int'),
                       'audited':           pd.Series(dtype='int'),
                       'difference':        pd.Series(dtype='int'),
                       'ballots':           pd.Series(dtype='int'),
                       'issues':            pd.Series(dtype='int'),
                       'mode':              pd.Series(dtype='str'),
                       'method':            pd.Series(dtype='str')
})

#Initialize the log file
with open(LOG_FILE, "w") as f:
    f.write(f"Dataset merging report. Run at {datetime.datetime.now()} \n \n")
f.close()


###############################################################################
# Global functions
###############################################################################

def AutoStandardize(df):
    """
    We insist on some consistent standardization for any new dataframe, e.g.
    Candidates and offices:
       uppercase
       no periods
    """
    varsToCleanOverall = ["candidate", "office", "county"]
    #Drop any variables that don't appear in this dataframe (sometimes they
    # will legitimately not appear, in other cases the absence will be flagged
    # by the column comparison code)
    varsToClean = []
    for var in varsToCleanOverall:
        if var in df.columns:
            varsToClean.append(var)
    for var in varsToClean:
        df[var] = df[var].str.upper()
        df[var] = df[var].str.replace(".", "", regex=False)
    return(df)

def ReplaceInCol(df, replacements, idCol, targetCol):
    """
    Given a dictionary of replacements to make, apply them to a column.
    idCol specifies the value for identifying indices, while targetCol is the
    place to make replacements. Replace values in one column by setting
    idCol = targetCol.
    """
    for r in replacements.keys():
        df.loc[df[idCol] == r, targetCol] = replacements[r]
    return(df)

def CheckStandards(new, merged, officesToCheck, LOG_FILE):
    """
    Function that reports on the status of standardizing the data
    """
    CheckCandsInOffice(new, officesToCheck, LOG_FILE)

def CheckOfficeNames(new, LOG_FILE):
    """
    Print the unique list of office names
    """
    with open(LOG_FILE, "a+") as f:
        f.write(f'Unique offices in new dataframe: {new["office"].unique()}\n\n')
    f.close()

def CheckCandsInOffice(new, offices, LOG_FILE):
    """
    Check candidates in offices (given offices, a list of offices)
    """
    with open(LOG_FILE, "a+") as f:
        f.write(LITTLE_SEP + 'OFFICES \n')
    f.close()
    CheckOfficeNames(new, LOG_FILE)
    for office in offices:
        #Sorted and unique candidates for the office under consideration
        cands = new.loc[new["office"] == office, "candidate"]
        cands = np.sort(cands.unique())
        with open(LOG_FILE, "a+") as f:
            f.write(f'Unique cands for {office}: {cands} \n\n')
        f.close()

def CheckNames(new, merged, LOG_FILE):
    """
    Compare the variable names between the new dataset and the merged dataset
    """
    newNotOld = set(new.head()).difference(set(merged.head()))
    allNotNew = set(merged.head()).difference(set(new.head()))
    with open(LOG_FILE, "a+") as f:
        f.write(f"COLUMN NAMES in NEW but not in merged: {newNotOld} \n\n")
        f.write(f"COLUMN NAMES in MERGED but not in new: {allNotNew} \n\n")
    f.close()

def MergeData(new, merged, VERBOSE):
    """
    Join explicitly on column names
    """
    merged = pd.concat([merged, new],
                       join = 'outer',
                       axis = 0,
                       sort=False)
    return(merged)

def AssignBaseOfficeCats(df):
    """
    This function assigns a first pass to office categories. Crucially, this
    is just a first pass, with many plausible fail states (for example, an
    office with the label 'COUNTY JUDGE CLERK' will receive two consecutive
    labels). Every office needs to be visually checked after this function is
    run, and any errors manually corrected.
    """
    office_cats = {
                   'US PRESIDENT': 'US PRESIDENT',
                   'US SENATE': 'US SENATE',
                   'US HOUSE': 'US HOUSE',
                   'STATE SENATE': 'STATE SENATE',
                   'STATE HOUSE': 'STATE HOUSE',
                   'ALL': 'MULTI'
                  }
    for office in office_cats.keys():
        df.loc[df.office == office, 'office_cat'] = office_cats[office]
    df.loc[df.office.str.contains('REFERENDUM'),'office_cat'] = 'REFERENDUM'
    df.loc[df.office.str.contains('MEASURE'),'office_cat'] = 'REFERENDUM'
    df.loc[df.office.str.contains('PROPOSITION'),'office_cat'] = 'REFERENDUM'
    df.loc[df.office.str.contains('INITIATIVE'),'office_cat'] = 'REFERENDUM'
    df.loc[df.office.str.contains('QUESTION'),'office_cat'] = 'REFERENDUM'
    df.loc[df.office.str.contains('COURT'),'office_cat'] = 'JUDICIAL'
    df.loc[df.office.str.contains('JUDGE'),'office_cat'] = 'JUDICIAL'
    df.loc[df.office.str.contains('ATTORNEY'),'office_cat'] = 'JUDICIAL'
    df.loc[df.office.str.contains('COUNTY COMMISSION'),'office_cat'] = 'COUNTY' 
    df.loc[df.office.str.contains('EDUCATION'),'office_cat'] = 'EDUCATION'
    df.loc[df.office.str.contains('SCHOOL'),'office_cat'] = 'EDUCATION'
    df.loc[df.office.str.contains('COLLEGE'),'office_cat'] = 'EDUCATION'
    df.loc[df.office.str.contains('MAYOR'),'office_cat'] = 'CITY'
    df.loc[df.office.str.contains('CITY COUNCIL'),'office_cat'] = 'CITY'
    df.loc[df.office.str.contains('CITY CLERK'),'office_cat'] = 'CITY'
    df.loc[df.office.str.contains('CITY COMMISSIONER'),
                                  'office_cat'] = 'CITY'
    df.loc[df.office.str.contains('TOWN COUNCIL'),'office_cat'] = 'CITY'
    df.loc[df.office.str.contains('SHERIFF'),'office_cat'] = 'ENFORCEMENT'
    df.loc[df.office.str.contains('ELECTIONS'),'office_cat'] = 'SERVICES'
    df.loc[df.office.str.contains('SERVICES'),'office_cat'] = 'SERVICES'
    df.loc[df.office.str.contains('FIRE'),'office_cat'] = 'SERVICES'
    df.loc[df.office.str.contains('SANITARY'),'office_cat'] = 'SERVICES'
    df.loc[df.office.str.contains('RECREATION'),'office_cat'] = 'SERVICES'
    df.loc[df.office.str.contains('AIRPORT'),'office_cat'] = 'SERVICES'
    df.loc[df.office.str.contains('WATER'),'office_cat'] = 'SERVICES'
    df.loc[df.office.str.contains('DEVELOPMENT'),'office_cat'] = 'SERVICES'
    df.loc[df.office.str.contains('GOVERNOR'),'office_cat'] = 'GOVERNOR'
    df.loc[df.office.str.contains('HOSPITAL'),'office_cat'] = 'SERVICES'
    df.loc[df.office.str.contains('CORPORATION COMMISSION'),\
                                  'office_cat'] = 'SERVICES' 
    return(df)

def ReportOfficeCats(new, LOG_FILE):
    theMap = new[['office', 'office_cat']]
    theMap = theMap.drop_duplicates()
    with open(LOG_FILE, "a+") as f:
        with pd.option_context('display.max_rows', None,
                               'display.max_columns', None):
            f.write(f'Office cat map:\n {str(theMap)}\n\n')
    f.close()

def ReportParties(df, office, LOG_FILE):
    df = df.loc[df.office == office]
    theMap = df[['candidate','party']]
    theMap = theMap.drop_duplicates(ignore_index=True)
    with open(LOG_FILE, "a+") as f:
        with pd.option_context('display.max_rows', None,
                               'display.max_columns', None):
            f.write(f'Candidate party map for {office}:\n {str(theMap)}\n\n')
    f.close()

def ReportDistricts(df, LOG_FILE):
    theMap = df[['office','district']]
    theMap = theMap.drop_duplicates(ignore_index=True)
    theMap = theMap.sort_values(by=['office'])
    with open(LOG_FILE, "a+") as f:
        with pd.option_context('display.max_rows', None,
                               'display.max_columns', None):
            f.write(f'Office district map:\n {str(theMap)}\n\n')
    f.close()

def DeclareState(LOG_FILE, BIG_SEP, state):
    """
    Print current state to the log file with clear delineation
    """
    with open(LOG_FILE, 'a+') as f:
        f.write(BIG_SEP + '- ' + state + BIG_SEP)
    f.close()

def WrapState(LOG_FILE, state):
    with open(LOG_FILE, 'a+') as f:
        f.write(f'\n # Done {state} \n')
    f.close()

def SplitColByStr(df, oldColName, newColName, theString):
    """
    Extract substring from one column and insert it into another
    """
    #Create a new column that contains the substring wherever it appears
    _newCol = df[oldColName].str.extract(theString, expand=True)
    #Find the locations that are not NA
    _replace_locs = [not _ for _ in list(_newCol.isna()[0])]
    #Insert the non-NA values into the column we want to modify
    df.loc[_replace_locs, newColName] = _newCol[_replace_locs][0]
    return(df)

def SwitchNameOrder(df):
    """
    Changes LASTNAME, FIRSTNAME order to FIRSTNAME LASTNAME.
    
    This function does not assume that every candidate is in LASTNAME, FIRSTNAME
    order and it will not affect any names that do not contain a comma. However,
    it does affect every name that has a comma, so the user must first ensure
    that for example no candidate has a name of the form FIRSTNAME LASTNAME, JR.

    All changes take place in local variables, so that the values of the
    candidate column are the only change to the dataframe.
    """
    _firstnames = df['candidate'].str.extract(', (.*)', expand=True)
    _lastnames = df['candidate'].str.extract('(.*),', expand=True)
    _replace_locs = [not _ for _ in list(_firstnames.isna()[0])]
    df.loc[_replace_locs, 'candidate'] = _firstnames[_replace_locs][0] + ' ' +\
                                         _lastnames[_replace_locs][0]
    return(df)


################################################################################
# AK statewide manual
################################################################################
DIR = "../state_data/AK/ready/"
ak = AutoStandardize(pd.read_csv(DIR+"ak.csv"))
DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'Alaska')

ak.precinct = ak.precinct.str.replace('\n',' ')

#Check variable names
CheckNames(new = ak,
           merged = merged,
           LOG_FILE = LOG_FILE)
CheckStandards(new = ak,
               merged = merged,
               officesToCheck = ak["office"].unique(),
               LOG_FILE = LOG_FILE)
#Assign office categories
ak = AssignBaseOfficeCats(ak)
ReportOfficeCats(new = ak, LOG_FILE = LOG_FILE)

merged = MergeData(ak, merged, VERBOSE)

WrapState(LOG_FILE, "Alaska")


################################################################################
# AR statewide manual
################################################################################
DIR = "../state_data/AR/ready/"
ar = AutoStandardize(pd.read_csv(DIR+"ar.csv"))
DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'Arkansas')

#Standardize variable names
ar = ar.rename(columns = {
    "original_votes": "original",
    "audited_votes": "audited",
    "diff": "difference"
})

#Check variable names
CheckNames(new = ar,
           merged = merged,
           LOG_FILE = LOG_FILE)

#Clean candidates
replacements = {
                'CL GAMMON': 'C L GAMMON',
                'GORIA LA RIVA': 'GLORIA LA RIVA',
                'PHILL COLLINS': 'PHIL COLLINS',
                'D BLANKENSHIP': 'DON BLANKENSHIP',
                'DONALD TRUMP': 'DONALD J TRUMP',
                'JOSEPH BIDEN': 'JOSEPH R BIDEN',
                'R DE LA FUENTE': 'ROQUE DE LA FUENTE'
               }
ar = ReplaceInCol(df = ar,
                  replacements = replacements,
                  idCol = 'candidate',
                  targetCol = 'candidate'
                 ) 

CheckStandards(new = ar,
               merged = merged,
               officesToCheck = ar["office"].unique(),
               LOG_FILE = LOG_FILE)

ar = AssignBaseOfficeCats(ar)
ReportOfficeCats(new = ar, LOG_FILE = LOG_FILE)

merged = MergeData(ar, merged, VERBOSE)

WrapState(LOG_FILE, "Arkansas")


################################################################################
# AZ county audits
################################################################################
DIR = "../state_data/AZ/ready/"

##Coconino County
coco = AutoStandardize(pd.read_csv(DIR+"coconino_cleaned.csv"))
DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'Arizona: Coconino')
coco = coco.rename(columns = {'type': 'method',
                              'batch': 'division'
                             })

replacements = {
                'PRESIDENTIAL ELECTOR': 'US PRESIDENT',
                'US REPRESENTATIVE, CD 1': 'US HOUSE',
                'STATE REPRESENTATIVE, LD 6': 'STATE HOUSE',
                'PROP 208': 'PROPOSITION 208',
                'CORPORATION COMISSION': 'CORPORATION COMMISSION'
               }
coco = ReplaceInCol(df = coco,
                    replacements = replacements,
                    idCol = 'office',
                    targetCol = 'office'
                   ) 

coco = SwitchNameOrder(coco)

replacements = {
                'JOSEPH BIDEN': 'JOSEPH R BIDEN',
                'DONALD TRUMP': 'DONALD J TRUMP'
               }
coco = ReplaceInCol(df = coco,
                    replacements = replacements,
                    idCol = 'candidate',
                    targetCol = 'candidate'
                   ) 

coco['district'] = ''
coco.loc[coco.office == 'US HOUSE', 'district'] = 1
coco.loc[coco.office == 'STATE HOUSE', 'district'] = 6

coco['party'] = ''
replacements = {
                'JOSEPH R BIDEN': 'DEMOCRAT',
                'DONALD J TRUMP': 'REPUBLICAN',
                'JO JORGENSEN': 'LIBERTARIAN',
                "TOM O'HALLERAN": 'DEMOCRAT',
                'TIFFANY SHEDD': 'REPUBLICAN',
                'ART BABBOTT': 'OTHER',
                'BRENDA BARTON': 'REPUBLICAN',
                'CORAL J EVANS': 'DEMOCRAT',
                'WALTER "WALT" BLACKMAN': 'REPUBLICAN'
               }
coco = ReplaceInCol(df = coco,
                    replacements = replacements,
                    idCol = 'candidate',
                    targetCol = 'party'
                   ) 

CheckNames(new = coco,
           merged = merged,
           LOG_FILE = LOG_FILE)
CheckStandards(new = coco,
               merged = merged,
               officesToCheck = coco["office"].unique(),
               LOG_FILE = LOG_FILE)

coco = AssignBaseOfficeCats(coco)
ReportOfficeCats(new = coco, LOG_FILE = LOG_FILE)

merged = MergeData(coco, merged, VERBOSE)
WrapState(LOG_FILE, "Coconino county")

##Greenlee County
gr = AutoStandardize(pd.read_csv(DIR+"greenlee_cleaned.csv"))
DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'Arizona: Greenlee')
gr = gr.rename(columns = {'type': 'method', 'batch': 'division'})

#Split out party abbreviations
gr = SplitColByStr(df = gr,
                   oldColName = 'candidate',
                   newColName = 'party',
                   theString = '(\(.*)'
                  )
gr.candidate = gr.candidate.str.replace(' \(.*','',regex=True)
gr.candidate = gr.candidate.str.strip()
gr = SwitchNameOrder(gr)

replacements = {
                'COHEN JORGENSEN': 'JO JORGENSEN',
                'HARRIS BIDEN': 'JOSEPH R BIDEN',
                'PENCE TRUMP': 'DONALD J TRUMP',
                "JAMES \'\'JIM\" O\'CONNOR": 'JAMES "JIM" O\'CONNOR',
                'ANNA TOYAR': 'ANNA TOVAR'
               }
gr = ReplaceInCol(df = gr,
                  replacements = replacements,
                  idCol = 'candidate',
                  targetCol = 'candidate'
                 ) 

replacements = {
                'PRESIDENTIAL ELECTOR': 'US PRESIDENT',
                'US SENATOR': 'US SENATE',
                'STATE REPRESENTATIVE DISTRICT 14': 'STATE HOUSE',
                'CORPORATION COMISSIONER': 'CORPORATION COMMISSION'
               }
gr = ReplaceInCol(df = gr,
                  replacements = replacements,
                  idCol = 'office',
                  targetCol = 'office'
                 ) 

replacements = {
                'JOSEPH BIDEN': 'JOSEPH R BIDEN',
                'DONALD TRUMP': 'DONALD J TRUMP',
                'NA': 'NO' #Note this is not NaN, just OCR misreading No as Na
               }
gr = ReplaceInCol(df = gr,
                  replacements = replacements,
                  idCol = 'candidate',
                  targetCol = 'candidate'
                 ) 

gr['district'] = ''
gr.loc[gr.office == 'STATE HOUSE', 'district'] = 14

replacements = {
                '(DEM)': 'DEMOCRAT',
                '(REP)': 'REPUBLICAN'
               }
gr = ReplaceInCol(df = gr,
                  replacements = replacements,
                  idCol = 'party',
                  targetCol = 'party'
                 ) 

CheckNames(new = gr,
           merged = merged,
           LOG_FILE = LOG_FILE)
CheckStandards(new = gr,
               merged = merged,
               officesToCheck = gr["office"].unique(),
               LOG_FILE = LOG_FILE)

gr = AssignBaseOfficeCats(gr)
ReportOfficeCats(new = gr, LOG_FILE = LOG_FILE)

merged = MergeData(gr, merged, VERBOSE)
WrapState(LOG_FILE, "Greenlee county")

##Maricopa county
ma = AutoStandardize(pd.read_csv(DIR+"maricopa_cleaned.csv"))
DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'Arizona: Maricopa')
ma = ma.rename(columns = {'type': 'method', 'batch': 'division'})

replacements = {
                'PRESIDENT': 'US PRESIDENT',
                'US SENATOR': 'US HOUSE', #This mistake is in the original PDF!
                'STATE REPRESENTATIVE': 'STATE SENATE',
                'CORPORATION COMISSIONER': 'CORPORATION COMMISSION'
               }
ma = ReplaceInCol(df = ma,
                  replacements = replacements,
                  idCol = 'office',
                  targetCol = 'office'
                 ) 

#Split districts
ma = SplitColByStr(df = ma,
                   oldColName = 'candidate',
                   newColName = 'district',
                   theString = '\([C|L]D (.*?)\)'
                  )
ma.candidate = ma.candidate.str.replace(' \(.*','',regex=True)
ma.candidate = ma.candidate.str.strip()
ma = SwitchNameOrder(ma)

replacements = {
                'BIDEN/HARRIS': 'JOSEPH R BIDEN',
                'TRUMP/PENCE': 'DONALD J TRUMP',
                'JORGENSEN/COHEN': 'JO JORGENSEN',
                'PROP 208 - NO': 'NO',
                'PROP 208 - YES': 'YES',
               }
ma = ReplaceInCol(df = ma,
                  replacements = replacements,
                  idCol = 'candidate',
                  targetCol = 'candidate'
                 ) 

ma['party'] = ''
replacements = {
                'JOSEPH R BIDEN': 'DEMOCRAT',
                'DONALD J TRUMP': 'REPUBLICAN',
                'JO JORGENSEN': 'LIBERTARIAN',
                'ANDY BIGGS': 'DEMOCRAT',
                'JOAN GREENE': 'REPUBLICAN',
                'DAVID SCHWEIKERT': 'REPUBLICAN',
                'HIRAL TIPIRNENI': 'DEMOCRAT',
                'RUBEN GALLEGO': 'DEMOCRAT',
                'JOSHUA BARNETT': 'REPUBLICAN',
                "TOM O'HALLERAN": 'DEMOCRAT',
                'TIFFANY SHEDD': 'REPUBLICAN',
                'RAUL GRIJALVA': 'DEMOCRAT',
                'DANIEL WOOD': 'REPUBLICAN',
                'PAUL GOSAR': 'REPUBLICAN',
                'DELINA DISANTO': 'DEMOCRAT',
                'DEBBIE LESKO': 'REPUBLICAN',
                'MICHAEL MUSCATO': 'DEMOCRAT',
                'GREG STANTON': 'DEMOCRAT',
                'DAVE GILES': 'REPUBLICAN',
                'AJLAN "AJ" KURDOGLU': 'DEMOCRAT',
                'CHRISTINE MARSH': 'DEMOCRAT',
                'DAVID LIVINGSTON': 'REPUBLICAN',
                'DOUGLAS ERVIN': 'DEMOCRAT',
                'GARLAND SHREVES': 'REPUBLICAN',
                'GILBERT "GILBERT" CARILLO': 'DEMOCRAT',
                'JAE CHIN': 'REPUBLICAN',
                'JD MESNARD': 'REPUBLICAN',
                'JOHN WILSON': 'REPUBLICAN',
                'JUAN MENDEZ': 'DEMOCRAT',
                'KAREN ELIZABETH FANN': 'REPUBLICAN',
                'KATE BROPHY MCGEE': 'REPUBLICAN',
                'LELA ALSTON': 'DEMOCRAT',
                'LISA OTONDO': 'DEMOCRAT',
                'LYNSEY ROBINSON': 'DEMOCRAT',
                'MARTIN J QUEZADA': 'DEMOCRAT',
                'MICHELLE UGENTI-RITA': 'REPUBLICAN',
                'PAUL BOYER': 'REPUBLICAN',
                'PAUL WEIGEL': 'DEMOCRAT',
                'RAY MICHAELS': 'REPUBLICAN',
                'REBECCA RIOS': 'DEMOCRAT',
                'SARAH TYREE': 'DEMOCRAT',
                'SEAN BOWIE': 'DEMOCRAT',
                'SETH BLATTMAN': 'DEMOCRAT',
                'SUZANNE SHARER': 'REPUBLICAN',
                'TRAVIS ANGRY': 'REPUBLICAN',
                'TYLER PACE': 'REPUBLICAN',
                'WARREN PETERSEN': 'REPUBLICAN'
               }
ma = ReplaceInCol(df = ma,
                  replacements = replacements,
                  idCol = 'candidate',
                  targetCol = 'party'
                 ) 

CheckNames(new = ma,
           merged = merged,
           LOG_FILE = LOG_FILE)
CheckStandards(new = ma,
               merged = merged,
               officesToCheck = ma["office"].unique(),
               LOG_FILE = LOG_FILE)

ma = AssignBaseOfficeCats(ma)
ReportOfficeCats(new = ma, LOG_FILE = LOG_FILE)

merged = MergeData(ma, merged, VERBOSE)
WrapState(LOG_FILE, "Maricopa county")

##Mohave County
mo = AutoStandardize(pd.read_csv(DIR+"mohave_cleaned.csv"))
DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'Arizona: Mohave')
mo = mo.rename(columns = {'type': 'method', 'batch': 'division'})

replacements = {
                'PRESIDENTIAL ELECTORS': 'US PRESIDENT',
                'CORPORATION COMMISSIONER': 'CORPORATION COMMISSION',
                'US, SENATOR': 'US SENATE',
                'STATE REPRESENTATIVE DIST 7': 'STATE HOUSE',
                'US SENATOR': 'US SENATE',
                'STATE REPRESENTATIVE 01ST 7': 'STATE HOUSE'
               }
mo = ReplaceInCol(df = mo,
                  replacements = replacements,
                  idCol = 'office',
                  targetCol = 'office'
                 ) 

mo = SwitchNameOrder(mo)

replacements = {
                'BIDEN/HARRIS': 'JOSEPH R BIDEN',
                'JORGENSEN/COHEN': 'JO JORGENSEN',
                'MOEN/HARRIS': 'JOSEPH R BIDEN', #OCR mistake
                'TRUMP/PENCE': 'DONALD J TRUMP',
                'O\'CONNOR JAMES "11M"': 'JAMES "JIM" O\'CONNOR',
                'O\'CONNOR JAMES "JIM"': 'JAMES "JIM" O\'CONNOR',
                'O\'CONNOR JAMES \'JIM"': 'JAMES "JIM" O\'CONNOR',
                'O\'CONNOR TAMES "JIM"': 'JAMES "JIM" O\'CONNOR',
                'ARLAND° TELLER': 'ARLANDO TELLER',
                'JAMES "IIM\' PARKS': 'JAMES "JIM" PARKS',
                "MYRON 'MS1E": 'MYRON TSOSIE',
                'MYRON TSCISIE': 'MYRON TSOSIE'
               }
mo = ReplaceInCol(df = mo,
                  replacements = replacements,
                  idCol = 'candidate',
                  targetCol = 'candidate'
                 ) 

replacements = {
                '(DEM)': 'DEMOCRAT',
                '(REP)': 'REPUBLICAN',
                '(OEM)': 'DEMOCRAT',
                'ADEM)': 'DEMOCRAT'
               }
mo = ReplaceInCol(df = mo,
                  replacements = replacements,
                  idCol = 'party',
                  targetCol = 'party'
                 ) 

CheckNames(new = mo,
           merged = merged,
           LOG_FILE = LOG_FILE)
CheckStandards(new = mo,
               merged = merged,
               officesToCheck = mo["office"].unique(),
               LOG_FILE = LOG_FILE)
mo = AssignBaseOfficeCats(mo)
ReportOfficeCats(new = mo, LOG_FILE = LOG_FILE)
merged = MergeData(mo, merged, VERBOSE)
WrapState(LOG_FILE, "Mohave county")

##Navajo county
na = AutoStandardize(pd.read_csv(DIR+"navajo_cleaned.csv"))
DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'Arizona: Navajo')
na = na.rename(columns = {'type': 'method', 'batch': 'division'})

na = SplitColByStr(df = na,
                   oldColName = 'office',
                   newColName = 'district',
                   theString = 'DIST (.*)'
                   )
na.office = na.office.str.replace('DIST .*','',regex=True)
na.office = na.office.str.strip()

replacements = {
                'U S PRESIDENT': 'US PRESIDENT',
                'U S SENATOR': 'US SENATE',
                'US REP IN CONGRESS': 'US HOUSE',
                'US REP LEG': 'STATE HOUSE',
                'CORP COMMISSIONER': 'CORPORATION COMMISSION',
                'PROP 208': 'PROPOSITION 208'
               }
na = ReplaceInCol(df = na,
                  replacements = replacements,
                  idCol = 'office',
                  targetCol = 'office'
                 )

replacements = {
                'BIDEN/HARRIS': 'JOSEPH R BIDEN',
                'JORGENSEN/COHEN': 'JO JORGENSEN',
                'TRUMP/PENCE': 'DONALD J TRUMP',
               }
na = ReplaceInCol(df = na,
                  replacements = replacements,
                  idCol = 'candidate',
                  targetCol = 'candidate'
                 ) 

replacements = {
                'JOSEPH R BIDEN': 'DEMOCRAT',
                'DONALD J TRUMP': 'REPUBLICAN',
                'JO JORGENSEN': 'LIBERTARIAN',
                'MARTHA MCSALLY': 'REPUBLICAN',
                'MARK KELLY': 'DEMOCRAT',
                "TOM O'HALLERAN": 'DEMOCRAT',
                'TIFFANY SHEDD': 'REPUBLICAN',
                'ART BABBOTT': 'OTHER',
                'BRENDA BARTON': 'REPUBLICAN',
                'CORAL EVANS': 'DEMOCRAT',
                'WALTER BLACKMAN': 'REPUBLICAN',
                'JAMES PARKS': 'REPUBLICAN',
                'DAVID PEELMAN': 'REPUBLICAN',
                'ARLANDO TELLER': 'DEMOCRAT',
                'MYRON TSOSIE': 'DEMOCRAT'
                }
na = ReplaceInCol(df = na,
                  replacements = replacements,
                  idCol = 'candidate',
                  targetCol = 'party'
                 )

CheckNames(new = na,
           merged = merged,
           LOG_FILE = LOG_FILE)
CheckStandards(new = na,
               merged = merged,
               officesToCheck = na["office"].unique(),
               LOG_FILE = LOG_FILE)
na = AssignBaseOfficeCats(na)
ReportDistricts(df = na, LOG_FILE = LOG_FILE)
for office in na.office.unique():
    ReportParties(df = na, office = office, LOG_FILE = LOG_FILE)
ReportOfficeCats(new = na, LOG_FILE = LOG_FILE)
merged = MergeData(na, merged, VERBOSE)
WrapState(LOG_FILE, "Navajo county")

##Pima county
pi = AutoStandardize(pd.read_csv(DIR+"pima_cleaned.csv"))
DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'Arizona: Pima')
pi = pi.rename(columns = {'type': 'method', 'batch': 'division'})

pi = SplitColByStr(df = pi,
                   oldColName = 'office',
                   newColName = 'district',
                   theString = 'DISTRICT (.*)'
                   )
pi.office = pi.office.str.replace('DISTRICT .*','',regex=True)
pi.office = pi.office.str.strip()

replacements = {
                'PRESIDENTIAL ELECTOR': 'US PRESIDENT',
                'U S SENATOR': 'US SENATE',
                'CORPORATION COMMISSIONER': 'CORPORATION COMMISSION',
                'STATE SENATOR LEGISLATIVE': 'STATE SENATE'
               }
pi = ReplaceInCol(df = pi,
                  replacements = replacements,
                  idCol = 'office',
                  targetCol = 'office'
                 ) 

pi = SwitchNameOrder(pi)

replacements = {
                'BIDEN / HARRIS': 'JOSEPH R BIDEN',
                'JORGENSEN / COHEN': 'JO JORGENSEN',
                'TRUMP / PENCE': 'DONALD J TRUMP'
               }
pi = ReplaceInCol(df = pi,
                  replacements = replacements,
                  idCol = 'candidate',
                  targetCol = 'candidate'
                 ) 

replacements = {
                'JOSEPH R BIDEN': 'DEMOCRAT',
                'DONALD J TRUMP': 'REPUBLICAN',
                'JO JORGENSEN': 'LIBERTARIAN',
                'MARTHA MCSALLY': 'REPUBLICAN',
                'MARK KELLY': 'DEMOCRAT',
                'ROSANNA GAVALDÓN': 'DEMOCRAT',
                'MARK WORKMAN': 'REPUBLICAN',
                'LISA OTONDO': 'DEMOCRAT',
                'TRAVIS ANGRY': 'REPUBLICAN',
                'KIRSTEN ENGEL': 'DEMOCRAT',
                'JUSTINE WADSACK': 'REPUBLICAN',
                'JOANNA MENDOZA': 'DEMOCRAT',
                'VENDEN "VINCE" LEACH': 'REPUBLICAN',
                'BOB KARP': 'DEMOCRAT',
                'DAVID GOWAN': 'REPUBLICAN',
                'LISA ONTONDO': 'DEMOCRAT'
               }
pi = ReplaceInCol(df = pi,
                  replacements = replacements,
                  idCol = 'candidate',
                  targetCol = 'party'
                 ) 

CheckNames(new = pi,
           merged = merged,
           LOG_FILE = LOG_FILE)
CheckStandards(new = pi,
               merged = merged,
               officesToCheck = pi["office"].unique(),
               LOG_FILE = LOG_FILE)
pi = AssignBaseOfficeCats(pi)
ReportDistricts(df = pi, LOG_FILE = LOG_FILE)
for office in pi.office.unique():
    ReportParties(df = pi, office = office, LOG_FILE = LOG_FILE)
ReportOfficeCats(new = pi, LOG_FILE = LOG_FILE)
merged = MergeData(pi, merged, VERBOSE)
WrapState(LOG_FILE, "Pima county")


##Santa Cruz
sc = AutoStandardize(pd.read_csv(DIR+"santacruz_cleaned.csv"))
DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'Arizona: Santa Cruz')
sc = sc.rename(columns = {'type': 'method', 'batch': 'division'})

sc = SplitColByStr(df = sc,
                   oldColName = 'office',
                   newColName = 'district',
                   theString = 'DIST (.*)'
                   )
sc.office = sc.office.str.replace('DIST .*','',regex=True)
sc.office = sc.office.str.strip()

sc = SwitchNameOrder(sc)

replacements = {
                'U S PRESIDENT': 'US PRESIDENT',
                'US REP IN CONGRESS': 'US HOUSE',
                'STATE REP LEG': 'STATE HOUSE',
                'CORP COMMISSIONER': 'CORPORATION COMMISSION'
               }
sc = ReplaceInCol(df = sc,
                  replacements = replacements,
                  idCol = 'office',
                  targetCol = 'office'
                 ) 

replacements = {
                'JOE BIDEN': 'JOSEPH R BIDEN',
                'DALESSANDRO ANDREA': 'ANDREA DALESSANDRO'
               }
sc = ReplaceInCol(df = sc,
                  replacements = replacements,
                  idCol = 'candidate',
                  targetCol = 'candidate'
                 ) 

replacements = {
                'JOSEPH R BIDEN': 'DEMOCRAT',
                'DONALD J TRUMP': 'REPUBLICAN',
                'JO JORGENSEN': 'LIBERTARIAN',
                'RAÚL GRIJALVA': 'DEMOCRAT',
                'DANIEL WOOD': 'REPUBLICAN',
                'DEBORAH MCEWEN': 'REPUBLICAN',
                'ANDREA DALESSANDRO': 'DEMOCRAT',
                'DANIEL HERNANDEZ JR': 'DEMOCRAT'
               }
sc = ReplaceInCol(df = sc,
                  replacements = replacements,
                  idCol = 'candidate',
                  targetCol = 'party'
                 ) 

CheckNames(new = sc,
           merged = merged,
           LOG_FILE = LOG_FILE)
CheckStandards(new = sc,
               merged = merged,
               officesToCheck = sc["office"].unique(),
               LOG_FILE = LOG_FILE)
sc = AssignBaseOfficeCats(sc)
ReportDistricts(df = sc, LOG_FILE = LOG_FILE)
for office in sc.office.unique():
    ReportParties(df = sc, office = office, LOG_FILE = LOG_FILE)
ReportOfficeCats(new = sc, LOG_FILE = LOG_FILE)
merged = MergeData(sc, merged, VERBOSE)
WrapState(LOG_FILE, "Santa Cruz county")


##Yavapai county
ya = AutoStandardize(pd.read_csv(DIR+"yavapai_cleaned.csv"))
DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'Arizona: Yavapai')
ya = ya.rename(columns = {'type': 'method', 'batch': 'division'})

ya = SplitColByStr(df = ya,
                   oldColName = 'office',
                   newColName = 'district',
                   theString = 'DISTRICT (.*)'
                   )
ya.office = ya.office.str.replace('DISTRICT .*','',regex=True)
ya.office = ya.office.str.replace(' -','',regex=False)
ya.office = ya.office.str.strip()

ya = SplitColByStr(df = ya,
                   oldColName = 'candidate',
                   newColName = 'party',
                   theString = '(\(.*)'
                  )
ya.candidate = ya.candidate.str.replace(' \(.*','',regex=True)
ya.candidate = ya.candidate.str.strip()
ya = SwitchNameOrder(ya)

replacements = {
                'PRESIDENTIAL ELECTOR': 'US PRESIDENT',
                'CORPORATION COMMISSIONER': 'CORPORATION COMMISSION',
                'PROPOSITION # 208': 'PROPOSITION 208',
                'US REPRESENTATIVE': 'US HOUSE',
                'STATE SENATOR': 'STATE SENATE'
               }
ya = ReplaceInCol(df = ya,
                  replacements = replacements,
                  idCol = 'office',
                  targetCol = 'office'
                 ) 

replacements = {
                'BIDEN / HARRIS': 'JOSEPH R BIDEN',
                'JORGENSEN / COHEN': 'JO JORGENSEN',
                'TRUMP / PENCE': 'DONALD J TRUMP'
               }
ya = ReplaceInCol(df = ya,
                  replacements = replacements,
                  idCol = 'candidate',
                  targetCol = 'candidate'
                 ) 

replacements = {
                '(REP)': 'REPUBLICAN',
                '(DEM)': 'DEMOCRAT',
                '(LBT)': 'LIBERTARIAN'
               }
ya = ReplaceInCol(df = ya,
                  replacements = replacements,
                  idCol = 'party',
                  targetCol = 'party'
                 ) 

CheckNames(new = ya,
           merged = merged,
           LOG_FILE = LOG_FILE)
CheckStandards(new = ya,
               merged = merged,
               officesToCheck = ya["office"].unique(),
               LOG_FILE = LOG_FILE)
ya = AssignBaseOfficeCats(ya)
ReportDistricts(df = ya, LOG_FILE = LOG_FILE)
for office in ya.office.unique():
    ReportParties(df = ya, office = office, LOG_FILE = LOG_FILE)
ReportOfficeCats(new = ya, LOG_FILE = LOG_FILE)
merged = MergeData(ya, merged, VERBOSE)
WrapState(LOG_FILE, "Yavapai county")


################################################################################
# CA substate RLAs
################################################################################
DIR = "../state_data/CA/ready/"
inyo = AutoStandardize(pd.read_csv(DIR+"inyo_cleaned.csv"))
DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'California: Inyo RLA')

#Make the vote division the tabulator, batch, and container combined
inyo["division"] = inyo["container"].map(str) + ["_"] + \
                   inyo["tabulator"].map(str) + ["_"] + \
                   inyo["batch"].map(str)
#Standardize variable names
inyo = inyo.rename(columns = {
    "original_votes": "original",
    "audited_votes": "audited",
})
#Generate the simple difference variable
inyo["difference"] = inyo["audited"] - inyo["original"]
#Drop any extra variables
inyo = inyo.drop(["container", "tabulator", "batch", "margin_diff", 
                  "literal_diff", "date"], axis = 1)
#Additional information
inyo["method"] = "RLA"

#Check variable names
CheckNames(new = inyo,
           merged = merged,
           LOG_FILE = LOG_FILE)

CheckStandards(new = inyo,
               merged = merged,
               officesToCheck = inyo["office"].unique(),
               LOG_FILE = LOG_FILE)

inyo = AssignBaseOfficeCats(inyo)
cats = {
        'HEALTH CARE DIRECTOR 1': 'SERVICES',
        'BISHOP CITY TREASURER': 'SERVICES',
        'COUNTY SUPERVISOR 4': 'COUNTY'
        }
for c in cats:
    inyo.loc[inyo.office == c, 'office_cat'] = cats[c]
ReportOfficeCats(new = inyo, LOG_FILE = LOG_FILE)

merged = MergeData(inyo, merged, VERBOSE)

WrapState(LOG_FILE, "Inyo county")

#Now El Dorado, which has nearly the same format
ed = AutoStandardize(pd.read_csv(DIR+"eldorado_cleaned.csv"))
DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'California: El Dorado RLA')

#Make the vote bundle the tabulator, batch, and container combined
ed["division"] = ed["container"].map(str) + ["-"] + \
                 ed["tabulator"].map(str) + ["-"] + \
                 ed["batch"].map(str)
#Standardize variable names
ed = ed.rename(columns = {
    "original_votes": "original",
    "audited_votes": "audited",
})
#Generate the simple difference variable
ed["difference"] = ed["audited"] - ed["original"]
#Drop any extra variables
ed = ed.drop(["container", "tabulator", "batch", "margin_diff", 
                  "literal_diff", "date"], axis = 1)
#Additional information
ed["method"] = "RLA"

#Check variable names
CheckNames(new = ed,
           merged = merged,
           LOG_FILE = LOG_FILE)

#Clean candidates
replacements = {
                'JOHN  HIDAHL': 'JOHN HIDAHL',
               }
ed = ReplaceInCol(df = ed,
                  replacements = replacements,
                  idCol = 'candidate',
                  targetCol = 'candidate'
                 ) 

CheckStandards(new = ed,
               merged = merged,
               officesToCheck = ed["office"].unique(),
               LOG_FILE = LOG_FILE)

cats = {
        'GEORGETOWN DIVIDE UTILITY DIRECTOR': 'SERVICES',
        'EL DORADO IRRIGATION 1': 'SERVICES',
        'SOUTH TAHOE UTILITY DIRECTOR': 'SERVICES',
        'COUNTY SUPERVISOR': 'COUNTY',
        'EL DORADO HILLS DIRECTOR': 'EDUCATION',
        'EL DORADO IRRIGATION 3': 'COUNTY'
        }
for c in cats:
    ed.loc[ed.office == c, 'office_cat'] = cats[c]
ed = AssignBaseOfficeCats(ed)
ReportOfficeCats(new = ed, LOG_FILE = LOG_FILE)

merged = MergeData(ed, merged, VERBOSE)


################################################################################
# CA statewide manual
################################################################################
#Same directory as California county-level RLAs

#Humboldt county
hu = AutoStandardize(pd.read_csv(DIR+"humboldt_cleaned.csv"))
DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'California: Humboldt manual')
hu = hu.rename(columns = {
    "type": "method"
})
#Standardize some offices and identifiers
hu.loc[hu.office.str.contains('DISTRICT 2'), 'district'] = 2
replacements = {
                'PRESIDENT & VICE PRESIDENT': 'US PRESIDENT',
                'USRE RESENTATIVE, DISTRICT 2': 'US HOUSE',
                'STATE ASSEMBLY MEMBER, DISTRICT 2': 'STATE HOUSE',
                'PRO OSITION 23': 'PROPOSITION 23',
                'PRO °SALON 25': 'PROPOSITION 25'
               }
hu = ReplaceInCol(df = hu,
                  replacements = replacements,
                  idCol = 'office',
                  targetCol = 'office'
                 )
#Clean candidate names
replacements = {
                'BIDEN & HARRIS': 'JOSEPH R BIDEN',
                'GUERRA & WEST': 'ROQUE DE LA FUENTE',
                'HAWKINS & WALKER': 'HOWIE HAWKINS',
                'JORGENSEN & COHEN': 'JO JORGENSON',
                'LA RIVA & FREEMAN': 'GLORIA LA RIVA',
                'QUALIFIED WRITE-INS': 'WRITE-IN',
                'TRUMP & PENCE': 'DONALD J TRUMP'
               }
hu = ReplaceInCol(df = hu,
                  replacements = replacements,
                  idCol = 'candidate',
                  targetCol = 'candidate'
                 )
CheckNames(new = hu,
           merged = merged,
           LOG_FILE = LOG_FILE)
CheckStandards(new = hu,
               merged = merged,
               officesToCheck = hu["office"].unique(),
               LOG_FILE = LOG_FILE)
cats = {'HUMBOLDT COUNTY SUPERVISOR, DISTRICT 2': 'COUNTY'}
for c in cats:
    hu.loc[hu.office == c, 'office_cat'] = cats[c]
hu = AssignBaseOfficeCats(hu)
ReportOfficeCats(new = hu, LOG_FILE = LOG_FILE)
merged = MergeData(hu, merged, VERBOSE)

#Los Angeles county
la = AutoStandardize(pd.read_csv(DIR+"la_cleaned.csv"))
DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'California: Los Angeles manual')
la = la.rename(columns = {
    "subset": "division"
})
CheckNames(new = la,
           merged = merged,
           LOG_FILE = LOG_FILE)
la = AssignBaseOfficeCats(la)
ReportOfficeCats(new = la, LOG_FILE = LOG_FILE)
merged = MergeData(la, merged, VERBOSE)

#Mariposa county
ma = AutoStandardize(pd.read_csv(DIR+"mariposa_cleaned.csv"))
DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'California: Mariposa manual')
ma = ma.rename(columns = {
    "type": "method"
})
#Clean districts
ma.loc[ma.office.str.contains('DIST 5'), 'district'] = 5
ma.loc[ma.office.str.contains('DIST 4'), 'district'] = 4
#Clean offices
replacements = {
                'PRESIDENT': 'US PRESIDENT',
                '3TATE ASSEMB   DIST 5': 'STATE HOUSE',
                'US REP DIST 4': 'US HOUSE',
                'PROO 14': 'PROP 14',
                'PROO 16': 'PROP 16'
               }
ma = ReplaceInCol(df = ma,
                  replacements = replacements,
                  idCol = 'office',
                  targetCol = 'office'
                 )
#Clean candidates
replacements = {
                '-JOSEPH R, BIDEN': 'JOSEPH R BIDEN',
                'GLORIA LA RIVE': 'GLORIA LA RIVA',
                'GUERRA': 'ROQUE DE LA FUENTE'
               }
ma = ReplaceInCol(df = ma,
                  replacements = replacements,
                  idCol = 'candidate',
                  targetCol = 'candidate'
                 )
CheckNames(new = ma,
           merged = merged,
           LOG_FILE = LOG_FILE)
CheckStandards(new = ma,
               merged = merged,
               officesToCheck = ma["office"].unique(),
               LOG_FILE = LOG_FILE)
ma = AssignBaseOfficeCats(ma)
ma.loc[ma.office.str.contains('PROP'), 'office_cat'] = 'REFERENDUM'
ReportOfficeCats(new = ma, LOG_FILE = LOG_FILE)
merged = MergeData(ma, merged, VERBOSE)

#Modoc county
mo = AutoStandardize(pd.read_csv(DIR+"modoc_cleaned.csv"))
DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'California: Modoc manual')
mo = mo.rename(columns = {
    "type": "method"
})
mo.office = mo.office.str.replace('LCC DISTRICT','LCC',regex=True)
mo.office = mo.office.str.replace('1A5\(','1A5 (',regex=True)
mo = SplitColByStr(df = mo,
                   oldColName = 'office',
                   newColName = 'district',
                   theString = 'DISTRICT (.*)'
                   )
mo.office = mo.office.str.replace('( DISTRICT .*)','',regex=True)
replacements = {
                'PRESIDENT & VICE PRESIDENT': 'US PRESIDENT',
                'US REPRESENTATIVE': 'US HOUSE',
                'STATE SENATOR': 'STATE SENATE',
                'MEMBER OF THE STATE ASSEMBLY': 'STATE HOUSE'
               }
mo = ReplaceInCol(df = mo,
                  replacements = replacements,
                  idCol = 'office',
                  targetCol = 'office'
                 )
replacements = {
        'DEM  JOSEPH R BIDEN/KAMALA D HARRIS': 'JOSEPH R BIDEN',
        'GRN    HOWIE HAWKINS/ANGELA NICOLE WALKER': 'HOWIE HAWKINS',
        'HOWIE HAWKINS/ANGELA NICOLE WALKER': 'HOWIE HAWKINS',
        'JO JORGENSEN/JEREMY "SPIKE" COHEN': 'JO JORGENSEN',
        'OVER VOTES:': 'OVERVOTES',
        'P&F    GLORIA LA RIVA/ SUN IL FREEMAN': 'GLORIA LA RIVA',
        'P&F    GLORIA LA RIVAL SUNIL FREEMAN': 'GLORIA LA RIVA',
        'REP  DONALD J TRUMP/MICHAEL R PENCE': 'DONALD J TRUMP',
        'ROQUE "ROCKY" DE LA FUENTE/KANYE OMARI WEST': 'ROQUE DE LA FUENTE',
        'ROQUE \"ROCKY\'\' DE LA FUENTE/KANYE OMARI WEST': 'ROQUE DE LA FUENTE',
        'UNDER VOTES': 'UNDERVOTES',
        'UNDER VOTES:': 'UNDERVOTES',
        'WRITE-INS': 'WRITE-IN',
        'GLORIA LA RIVA/ SUNIL FREEMAN': 'GLORIA LA RIVA',
        'JOSEPH R BIDEN/KAMALA D HARRIS': 'JOSEPH R BIDEN',
        'U-RIDER VOTES:': 'UNDERVOTES',
        'DONALD J TRUMP/MICHAEL R PENCE': 'DONALD J TRUMP',
        'CORT CORTE1': 'CORT CORTEZ'
               }
mo = ReplaceInCol(df = mo,
                  replacements = replacements,
                  idCol = 'candidate',
                  targetCol = 'candidate'
                 )
mo.precinct = mo.precinct.str.upper()
mo['mode'] = ''
mo.loc[mo.precinct.str.contains('POLL VOTERS'), 'mode'] = 'IN-PERSON'
mo.loc[mo.precinct.str.contains('MAIL'), 'mode'] = 'MAIL'
mo.precinct = mo.precinct.str.replace('PRECINCT: ','')
mo.precinct = mo.precinct.str.replace('(\(.*)','',regex=True)

CheckNames(new = mo,
           merged = merged,
           LOG_FILE = LOG_FILE)
CheckStandards(new = mo,
               merged = merged,
               officesToCheck = mo["office"].unique(),
               LOG_FILE = LOG_FILE)
mo = AssignBaseOfficeCats(mo)
mo.loc[mo.office.str.contains('BOARD MEMBER'), 'office_cat'] = 'EDUCATION'
ReportOfficeCats(new = mo, LOG_FILE = LOG_FILE)
merged = MergeData(mo, merged, VERBOSE)

#San Bernardino county
sb = AutoStandardize(pd.read_csv(DIR+"san_bernardino_cleaned.csv"))
DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'California: San Bernardino manual')
merged = MergeData(sb, merged, VERBOSE)
WrapState(LOG_FILE, "San Bernardino")

#San Diego county
sd = AutoStandardize(pd.read_csv(DIR+"san_diego_cleaned.csv"))
DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'California: San Diego manual')
merged = MergeData(sd, merged, VERBOSE)
WrapState(LOG_FILE, "San Diego")

#Santa Barbara county
stb = AutoStandardize(pd.read_csv(DIR+"santa_barbara_cleaned.csv"))
DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'California: Santa Barbara manual')
stb = stb.rename(columns = {
    "type": "method"
})
stb['district'] = ''
stb.loc[stb.office.str.contains('24TH DIST'), 'district'] = 24
stb.loc[stb.office.str.contains('24TH OIST'), 'district'] = 24
stb.loc[stb.office.str.contains('24TH 01ST'), 'district'] = 24
stb.loc[stb.office.str.contains('19TH DISTRICT'), 'district'] = 19
stb.loc[stb.office.str.contains('35TH DISTRICT'), 'district'] = 35
stb.loc[stb.office.str.contains('37TH'), 'district'] = 37
stb.loc[stb.office.str.contains('DIST #1'), 'district'] = 1
stb.loc[stb.office.str.contains('0IST #2'), 'district'] = 2
stb.loc[stb.office.str.contains('01ST #2'), 'district'] = 2
stb.loc[stb.office.str.contains('0IST *4', regex=False), 'district'] = 4
stb.loc[stb.office.str.contains('01ST *4', regex=False), 'district'] = 4
stb.loc[stb.office.str.contains('DIST #5'), 'district'] = 5
stb.loc[stb.office.str.contains('#11'), 'district'] = 11
stb.loc[stb.office.str.contains('#1$',regex=True), 'district'] = 1
#TA is Trustee Area
stb = SplitColByStr(df = stb,
                    oldColName = 'office',
                    newColName = 'district',
                    theString = 'TA #(.)'
                    )
stb.office = stb.office.str.replace('-','', regex=False)
stb.office = stb.office.str.replace('_','', regex=False)
stb.office = stb.office.str.replace('0IST','DIST', regex=False)
stb.office = stb.office.str.replace('01ST','DIST', regex=False)
stb.office = stb.office.str.replace('*','#', regex=False)
stb.office = stb.office.str.replace('#11','', regex=False)
stb.office = stb.office.str.replace('(TA #.$)', 'TRUSTEE', regex=True)
stb.office = stb.office.str.replace('(DIST #.$)', '', regex=True)
stb.office = stb.office.str.replace('(,$)', '', regex=True)
stb.office = stb.office.str.replace('(DIST )','DISTRICT ',regex=True)
stb.office = stb.office.str.replace('(DIST$)','DISTRICT',regex=True)
stb.office = stb.office.str.replace('(EDUCATION #1$)','EDUCATION',regex=True)
stb.office = stb.office.str.replace('(MEMBERRECALL)','MEMBER RECALL',regex=True)
stb.office = stb.office.str.replace('(EDUCATIONTRUSTEE)',
                                    'EDUCATION TRUSTEE',regex=True)
stb.office = stb.office.str.replace(
                'COMMUNITY COLLEGE DISTRICT TRUSTEE',
                'COMMUNITY COLLEGE TRUSTEE',regex=True)
stb.loc[stb.office.str.contains('US REPRESENTATIVE'), 'office'] = 'US HOUSE'
stb.loc[stb.office.str.contains('STATE SENATOR'), 'office'] = 'STATE SENATE'
stb.loc[stb.office.str.contains('STATE ASSEMBLY'), 'office'] = 'STATE HOUSE'
stb.office = stb.office.str.replace('  ',' ')
stb.office = stb.office.str.strip()

stb.loc[stb.office == 'QUALIFIED WRITEIN PRESIDEN', 'candidate'] = 'WRITE-IN'

replacements = {
                'PRESIDENT': 'US PRESIDENT',
                'QUALIFIED WRITEIN PRESIDEN': 'US PRESIDENT',
                'MEASURE 02020': 'MEASURE O2020',
                'SANTA BARBARA UNIFIED SCHOOL DISTRICT':
                    'SANTA BARBARA UNIFIED SCHOOL DISTRICT TRUSTEE',
                'COUNTY BOARD OF EDUCATION':
                    'COUNTY BOARD OF EDUCATION TRUSTEE',
                'SANTA YNEZ VALLEY UNION HIGH SCHOOL DISTRICT':
                    'SANTA YNEZ VALLEY UNION HIGH',
                'CARPINTERIA CITY COUNCIL MEMBER': 
                    'CITY OF CARPINTERIA CITY COUNCIL MEMBER',
                'CARPINTERIA CITY COUNCIL':
                    'CITY OF CARPINTERIA CITY COUNCIL MEMBER',
                'SANTA BARBARA COMMUNITY COLLEGE DISTRIC TRUSTEE':
                    'SANTA BARBARA COMMUNITY COLLEGE TRUSTEE',
                'ISLA VISTA RECREATION AND PARK DISTRICTSHORT TERM':
                    'ISLA VISTA RECREATION AND PARK DISTRICT SHORT TERM',
                'SANTA MARIA JOINT UNION HIGH SCHOOL DISTRICT':
                    'SANTA MARIA JOINT UNION HIGH SCHOOL',
                'ALLAN HANCOCK JOINT COLLEGE DISTRICT TRUSTEE':
                    'ALLAN HANCOCK JOINT COLLEGE TRUSTEE',
                'CITY OF GOLETA CITY COUNCIL':
                    'CITY OF GOLETA CITY COUNCIL MEMBER',
                'CITY OF LOMPOC CITY COUNCIL':
                    'CITY OF LOMPOC CITY COUNCIL MEMBER',
                'ISLA VISTA COMMUNITY SERVCIES DISTRICT':
                    'ISLA VISTA COMMUNITY SERVICES DISTRICT',
                'VANDENBERG VILLAGE COMMUNITY SERVICES DST':
                    'VANDENBERG VILLAGE COMMUNITY SERVICES'
                }
stb = ReplaceInCol(df = stb,
                   replacements = replacements,
                   idCol = 'office',
                   targetCol = 'office'
                  )

stb.candidate = stb.candidate.str.replace('   ',' ',regex=True)
stb.candidate = stb.candidate.str.replace('  ',' ',regex=True)
stb.candidate = stb.candidate.str.replace('_','',regex=True)
stb.candidate = stb.candidate.str.replace(';','',regex=True)
replacements = {
                'DONA)_D J TRUMP': 'DONALD J TRUMP',
                'DONA)D J TRUMP': 'DONALD J TRUMP',
                'DONALD 1 TRUMP': 'DONALD J TRUMP',
                'GLORIA LA WA': 'GLORIA LA RIVA',
                'HOWIE HAWONS': 'HOWIE HAWKINS',
                'JOSEPH R BLIDEN': 'JOSEPH R BIDEN',
                'ROQUE "ROCIY\'\' DE LA FUENTE GUERRA': 'ROQUE DE LA FUENTE',
                "ROQUE 'ROCK  DE LA FUENTE GUERRA": 'ROQUE DE LA FUENTE',
                'WRITE IN I': 'WRITE-IN',
                'DONALD J TROMP': 'DONALD J TRUMP',
                'DONALD _1 TRUMP': 'DONALD J TRUMP',
                'GLOR A LA RIVA': 'GLORIA LA RIVA',
                'GLORIA 14 R1VA': 'GLORIA LA RIVA',
                'HOVVIE HAWKINS': 'HOWIE HAWKINS',
                'HOWIE HAOKINS': 'HOWIE HAWKINS',
                "JO JORGENSEN '": 'JO JORGENSEN',
                'JOSEOH R BIDEN': 'JOSEPH R BIDEN',
                'JOSEPH R BID,EN': 'JOSEPH R BIDEN',
                'JOSEPH R EIDEN': 'JOSEPH R BIDEN',
                'JOSEPH R SIDEN': 'JOSEPH R BIDEN',
                'JOSEPH R1BIDEN': 'JOSEPH R BIDEN',
                'OVER IVOTES': 'OVERVOTES',
                'OVER VOLES': 'OVERVOTES',
                'OVER VOTES': 'OVERVOTES',
                'ROQUE "ROCKY" DE LA FUENTE GUERRA': 'ROQUE DE LA FUENTE',
                'ROQUE "ROCKY\' DE LA FUENTE GUERRA': 'ROQUE DE LA FUENTE',
                "ROQUE 'ROCK    DE LA FUENTE GUERRA": 'ROQUE DE LA FUENTE',
                "ROQUE 'ROCK DE LA FUENTE GUERRA": 'ROQUE DE LA FUENTE',
                "UNDER 'LIOTES": 'UNDERVOTES',
                'UNDER V+S': 'UNDERVOTES',
                'UNDER VOTES': 'UNDERVOTES',
                'WRITE IN': 'WRITE-IN',
                'WRITE IN  I': 'WRITE-IN',
                'ANDY CALDINELL': 'ANDY CALDWELL',
                'WRITE-IN1': 'WRITE-IN',
                "AND)L' CALDWELL": 'ANDY CALDWELL',
                'SAL14D CARBAJAL': 'SALUD CARBAJAL',
                'SALUD CARBMAL': 'SALUD CARBAJAL',
                'UNDEI VOTES': 'UNDERVOTES',
                'WRITE-1N': 'WRITE-IN',
                'S MONIQUE ILIMON': 'S MONIQUE LIMON',
                'S MONIQUE ILIMON': 'S MONIQUE LIMON',
                'GARY 4 MICHAELS': 'GARY J MICHAELS',
                'GARY J 11/QCHAELS': 'GARY J MICHAELS',
                'GARY J MICIFIAELS': 'GARY J MICHAELS',
                'OVER YOTES': 'OVERVOTES',
                'UNDER VOTTES': 'UNDERVOTES',
                'STEVE BENNOT': 'STEVE BENNETT',
                'UNDER VO!TES': 'UNDERVOTES',
                'UNDER YOKES': 'UNDERVOTES',
                'WRITE- N': 'WRITE-IN',
                'WRITE-IN 1': 'WRITE-IN',
                'OVER VOTS': 'OVERVOTES',
                'OVER ,TOTES': 'OVERVOTES',
                'JORDAN CONNINGHAM': 'JORDAN CUNNINGHAM',
                'BRPCE PORTER': 'BRUCE PORTER',
                'UNDER N4OTES': 'UNDERVOTES',
                'UNDER VOIES': 'UNDERVOTES',
                'WR1TE-1NI': 'WRITE-IN',
                'WRITE-IN/': 'WRITE-IN',
                'LOU SEG4L': 'JOE SEGAL',
                'MICHELLE D WERD': 'MICHELLE DE WERD',
                'WELDON (JOE) HOWELL': 'WELDON JOE HOWELL',
                'WELDON 00E) HOWELL': 'WELDON JOE HOWELL',
                'JOE SEGAL': 'LOU SEGAL',
                'CAGE J ENGLMDER': 'CAGE J ENGLANDER',
                'CAGE J ENOANIDER': 'CAGE J ENGLANDER',
                'PETER R !MCDOUGALL': 'PETER R MACDOUGALL',
                'PETER R MAODOUGALL': 'PETER R MACDOUGALL',
                'OVER VOTLES': 'OVERVOTES',
                'WRITE-I N': 'WRITE-IN',
                'VERONICA G4ARDO': 'VERONICA GALLARDO',
                'VERONICA GALARDO': 'VERONICA GALLARDO',
                'VERONICA/ GALARDO': 'VERONICA GALLARDO',
                'ERIN J GUERENA': 'ERIN J GURENA',
                'ANNA EVEIRETT': 'ANNA EVERETT',
                'CELESTE BARBER': 'CELESTE BARBARA',
                'CELESTEI3ARBER': 'CELESTE BARBARA',
                'ERIN J GURE1NA': 'ERIN J GURENA',
                'ERIN JJ GURENA': 'ERIN J GURENA',
                'OVER VOES': 'OVERVOTES',
                'OVERLVOTES': 'OVERVOTES',
                'UNOR VOTES': 'UNDERVOTES',
                'WRITE-IN I': 'WRITE-IN',
                'WRITE-INI': 'WRITE-IN',
                "UNDER 'KTES": 'UNDERVOTES',
                'UNDER VBTES': 'UNDERVOTES',
                'SUZANNE LEVY1': 'SUZANNE LEVY',
                'ROBERT IL: MILLER': 'ROBERT K MILLER',
                'ROBERT K M LLER': 'ROBERT K MILLER',
                'BRIAN CAMPBIELL': 'BRIAN CAMPBELL',
                'UNDER YOTES': 'UNDERVOTES',
                'WRIT-IN': 'WRITE-IN',
                'WRITE-INT': 'WRITE-IN',
                'JACCOELINE REID': 'JACQUELINE REID',
                'JACQUEONE REID': 'JACQUELINE REID',
                'ELRAWD JDFIN MACLEARN': 'ELRAWD JOHN MACLEARN',
                'ELRAWD JOHN VIACLEARN': 'ELRAWD JOHN MACLEARN',
                'WENDY S1MS-MOTEN': 'WENDY SIMS-MOTEN',
                'WENDY SI1MS-MOTEN': 'WENDY SIMS-MOTEN',
                'WENDY §,IMS-MOTEN': 'WENDY SIMS-MOTEN',
                'VIRGINIA ALVMEZ': 'VIRGINIA ALVAREZ',
                'VIRGINIA IALVAREZ': 'VIRGINIA ALVAREZ',
                'LAURA CAPS': 'LAURA CAPPS',
                'LAURA OPPS': 'LAURA CAPPS',
                'MON1E DEVV1T': 'MONIE DEWIT',
                'MON1E DEWIT': 'MONIE DEWIT',
                'MON1E DEW1T': 'MONIE DEWIT',
                'MONIE DEW1T': 'MONIE DEWIT',
                'MONIE DEWII': 'MONIE DEWIT',
                'MONIG DEWIT': 'MONIE DEWIT',
                'VICKI BEN-Y4COV': 'VICKI BEN-YAACOV',
                'DEVANY BECOLER': 'DEVANY BECHLER',
                'DEVANYBECHLER': 'DEVANY BECHLER',
                'PATRICIA "MAX" RORTY': 'PATRICIA MAX RORTY',
                'PATRICIA "MAX\' RORTY': 'PATRICIA MAX RORTY',
                'PATRICIA! "MAX" RORTY': 'PATRICIA MAX RORTY',
                'SHOLEH 4AHANGIR': 'SHOLEH JAHANGIR',
                'SHOLEH JAHANGR': 'SHOLEH JAHANGIR',
                'ROGER S AC1EVES': 'ROGER S ACEVES',
                'ROGER S AOEVES': 'ROGER S ACEVES',
                'BRUCE VVALLACH': 'BRUCE WALLACH',
                'BRUCE WAL00,CH': 'BRUCE WALLACH',
                'JUSTIN SHOOS': 'JUSTIN SHORES',
                'JUSTIN SHORS': 'JUSTIN SHORES',
                'KYLE RICHAROS': 'KYLE RICHARDS',
                'STUART KASOIN': 'STUART KASDIN',
                'BLANCHE M "GRACE" WALLACE': 'BLANCHE M GRACE WALLACE',
                'BLANCHE M "IGRACE" WALLACE': 'BLANCHE M GRACE WALLACE',
                'BLANCHE M \'IGRACE" WALLACE': 'BLANCHE M GRACE WALLACE',
                'GEORGE EMEIRSON': 'GEORGE EMERSON',
                'GEORGE W EMERSON': 'GEORGE EMERSON',
                '!BOB WAGENECK': 'BOB WAGENECK',
                'BOB WAGENE1CK': 'BOB WAGENECK',
                'BOB WI4GENECK': 'BOB WAGENECK',
                'EDWARD FULLER ,': 'EDWARD FULLER',
                'UNDER NIFOTES': 'UNDERVOTES',
                'WRITE-IIN': 'WRITE-IN',
                'BILL ROSEN 1': 'BILL ROSEN',
                'LAUREN HAN1SON': 'LAUREN HANSON',
                'LAUREN) HANSON': 'LAUREN HANSON',
                'LOREN MASON': 'LAUREN HANSON',
                'LOREN MASOO': 'LAUREN HANSON',
                'PHEBV MANSUR': 'PHEBE MANSUR',
                'SHELDON BC(SIO': 'SHELDON BOSIO',
                'WRITE-IV': 'WRITE-IN',
                'UNDER VCTES': 'UNDERVOTES',
                'UNDELRVOTES': 'UNDERVOTES',
                'UNDER4/OTES': 'UNDERVOTES',
                'OVEDVOTES': 'OVERVOTES',
                'OVE VOTES': 'OVERVOTES',
                'OVER VOIES': 'OVERVOTES',
                'OVER YAKS': 'OVERVOTES',
                'OVER VBTES': 'OVERVOTES',
                'UNDELR VOTES': 'UNDERVOTES',
                'OVER ,ITOTES': 'OVERVOTES',
                'UNDER MOTES': 'UNDERVOTES',
                'UNDER VOES': 'UNDERVOTES',
                'OVER V+S': 'OVERVOTES',
                'UNDER VLOTES': 'UNDERVOTES',
                'OVER •ITOTES': 'OVERVOTES',
                'UNDER)LIOTES': 'UNDERVOTES',
                'B0NDS-0': 'BONDS-NO',
                'BONDS-YES I': 'BONDS-YES',
                'UNDER VPTES': 'UNDERVOTES',
                'ALICE M PATIN°': 'ALICE M PATINO',
                'WILLIAM "WILL" SMITH': 'WILLIAM WILL SMITH',
                'OVER TOTES': 'OVERVOTES',
                'JIM IICHARDSON': 'JIM RICHARDSON',
                'JEFF1HALL': 'JEFF HALL',
                'UNDER{ VOTES': 'UNDERVOTES',
                'SUZANS1E LEVY': 'SUZANNE LEVY',
                'ALEXANDER MURK1SON': 'ALEXANDER MURKISON',
                'JAKIT BLEVINS': 'JANET BLEVINS',
                'MARTIN OASEY': 'MARTIN CASEY',
                'OVE IR VOTES': 'OVERVOTES',
                'TO BLANCO': 'TOM BLANCO',
                'TOM BIANCO': 'TOM BLANCO',
                'WILLIAM "BILL" HEALTH': 'WILLIAM BILL HEALTH',
                'WILLIAM \'ON" HEALTH': 'WILLIAM BILL HEALTH',
                'KATO! FROEMMING': 'KATHI FROEMMING',
                'UNDERLVOTES': 'UNDERVOTES',
                'JENE1LLE OSBORNE': 'JENELLE OSBORNE',
                'JENELLE SBORNE': 'JENELLE OSBORNE',
                'UNDERL VOTES': 'UNDERVOTES',
                'WR1TE-IN': 'WRITE-IN',
                'VICTOR MEGA': 'VICTOR VEGA',
                'JAMS IAN MOSBY': 'JAMES IAN MOSBY',
                'CHRI$TOPHER DIAZ': 'CHRISTOPHER DIAZ',
                'OVER ,J/OTES': 'OVERVOTES',
                'GABFOELLA HAS': 'GABRIELLA HAAS',
                'JORI*N M QUIVEY': 'JORDAN M QUIVEY',
                'JORDAN M (*IVEY': 'JORDAN M QUIVEY',
                'TREVOR PATTESON': 'TREVOR PATTISON',
                ', UNDER VOTES': 'UNDERVOTES',
                'JOHN ABRAHAM ,POWELL': 'JOHN ABRAHAM POWELL',
                'JOHNIABRARAM POWELL': 'JOHN ABRAHAM POWELL',
                'SYLVIA EASTDN': 'SYLVIA EASTON',
                'SYLVIA EPISTON :': 'SYLVIA EASTON',
                'SYLVIIA EASTON': 'SYLVIA EASTON',
                'UNDERFVOTES': 'UNDERVOTES',
                'OVER VFTES': 'OVERVOTES',
                'MICHAEL N LEE I': 'MICHAEL N LEE',
                '-DORINNE LEE JOHNSON': 'DORINNE LEE JOHNSON',
                '-WRITE-IN': 'WRITE-IN',
                'DON EVERSOLL,': 'DON EVERSOLL',
                'EDWIN C MAOT1N, JR': 'EDWIN C MARTIN, JR',
                'GARY FULLER!': 'GARY FULLER',
                'DORINNE LEE1JOHNSON': 'DORINNE LEE JOHNSON',
                'DORINNE1LEE JOHNSON': 'DORINNE LEE JOHNSON',
                'BONDS-YS': 'BONDS-YES',
                'BONES-YES': 'BONDS-YES',
                'BONOS-NO': 'BONDS-NO',
                'UNDER VQTES': 'UNDERVOTES',
                'AARON SMITO': 'AARON SMITH',
                'CRAIG QOOK': 'CRAIG COOK',
                'JAIME D AMOND': 'JAIME DIAMOND',
                'ROGEL10 DELAGADO': 'ROGELIO DELGADO',
                'ROGELIO DE(AGADO': 'ROGELIO DELGADO',
                'WRITE-10': 'WRITE-IN',
                'ROGELIO DELAGADO': 'ROGELIO DELGADO',
                'JOSE JUAN !BARRA': 'JOSE JUAN IBARRA',
                'CALISSE1M COURTNEY': 'CALISSE M COURTNEY',
                'ERICA J4NE FLORES': 'ERICA JANE FLORES',
                'WRITE-III4': 'WRITE-IN',
                'CHRIS BOVVYER': 'CHRIS BOWYER',
                'MIKE SM1YUN': 'MIKE SMIYUN',
                "JOSE JOAN 'BARRA": 'JOSE JUAN IBARRA',
                "JOSE JUAN 'BARRA": 'JOSE JUAN IBARRA',
                'LUCY P1OILLA1': 'LUCY PADILLA',
                'PETER INRIGHT': 'PETER WRIGHT',
                'WRITE-I1': 'WRITE-IN',
                'JEANINE ROBITAILLE-FILIPPIN': 'JANINE ROBITAILLE-FILIPPIN',
                'MARK IMPNTIRE': 'MARK MCINTIRE',
                'WRITE*': 'WRITE-IN',
                'JEFF "VEIN BENDER': 'JEFF WEINBENDER',
                'ROGEILIO DELAGADO': 'ROGELIO DELGADO',
                'UNDER VO[TES': 'UNDERVOTES',
                'UNDER VK)TES': 'UNDERVOTES',
                'WRITON': 'WRITE-IN',
                'ARISTON JULI1AN': 'ARISTON JULIAN',
                'ANTONIO "TONY" RAMIREZ': 'ATONIO TONY RAMIREZ',
                'TOM MURRAY1': 'TOM MURRAY',
                'ED ANDRIS*': 'ED ANDRISEK'
                }
stb = ReplaceInCol(df = stb,
                   replacements = replacements,
                   idCol = 'candidate',
                   targetCol = 'candidate'
                  )
CheckNames(new = stb,
           merged = merged,
           LOG_FILE = LOG_FILE)
CheckStandards(new = stb,
               merged = merged,
               officesToCheck = stb["office"].unique(),
               LOG_FILE = LOG_FILE)

stb = AssignBaseOfficeCats(stb)
stb.loc[stb.office == 'CITY OF SANTA MARIA TREASURER', 'office_cat'] = 'CITY'
stb.loc[stb.office == 'SANTA YNEZ VALLEY UNION HIGH',
                      'office_cat'] = 'EDUCATION'
ReportOfficeCats(new = stb, LOG_FILE = LOG_FILE)

merged = MergeData(stb, merged, VERBOSE)

#Santa Clara county
stc = AutoStandardize(pd.read_csv(DIR+"santa_clara_cleaned.csv"))
DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'California: Santa Clara manual')
merged = MergeData(stc, merged, VERBOSE)
WrapState(LOG_FILE, "Santa Clara")

#Mendocino county
me = AutoStandardize(pd.read_csv(DIR+"mendocino_cleaned.csv"))
DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'California: Mendocino manual')
me = AssignBaseOfficeCats(me)
cats = {
        'COUNTY SUPERVISOR': 'COUNTY',
        'MENDOCINO COUNTY SUPERVISOR': 'COUNTY',
        'CITY OF UKIAH TREASURER': 'SERVICES',
        'UKIAH VALLEY SANITATION DISTRICT, DIRECTOR': 'SERVICES',
        'MENDOCINO COAST REC AND PARK DISTRICT, DIRECTOR': 'SERVICES',
        'PORTER VALLEY IRRIGATION DISTRICT, DIRECTOR': 'SERVICES'
        }
for c in cats.keys():
    me.loc[me.office == c, 'office_cat'] = cats[c]
me.loc[me.office.str.contains(
        'ARENA UNION ELEM/PT ARENA UNIO HS DIST, GOVERN'),
        'office_cat'] = 'EDUCATION'
ReportOfficeCats(new = me, LOG_FILE = LOG_FILE)
merged = MergeData(me, merged, VERBOSE)

#San Joaquin county
sj = AutoStandardize(pd.read_csv(DIR+"san_joaquin_cleaned.csv"))
DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'California: San Joaquin manual')
sj = AssignBaseOfficeCats(sj)
sj.loc[sj.office.str.contains('CONGRESSIONAL'), 'office_cat'] = 'MULTI'
sj.loc[sj.office.str.contains('RIPON UNIFIED SCHOOL'), 'office_cat'] = 'MULTI'
sj.loc[sj.office.str.contains('TRACY MAYOR, TRACY CITY COUNCIL, TRACY'),
       'office_cat'] = 'MULTI'
sj.loc[sj.office.str.contains('ASSEMBLY DISTRICT 9, LODI CITY COUNCIL'),
       'office_cat'] = 'MULTI'
sj.loc[sj.office.str.contains('LODI CITY COUNCIL DISTRICT 5, LODI UNIFIED'),
       'office_cat'] = 'MULTI'
sj.loc[sj.office == 'SO SAN JOAQUIN IRRIGATION DISTRICT 3',
       'office_cat'] = 'SERVICES'
sj.loc[sj.office.str.contains('LODI CITY COUNCIL DISTRICT 5, LODI UNIFIED'),
       'office_cat'] = 'MULTI'
sj.loc[sj.office.str.contains('TRUSTEE AREA 1, STOCKTON CITY'),
       'office_cat'] = 'MULTI'
sj.loc[sj.office.str.contains('MANTECA CITY COUNCIL, MEASURE'),
       'office_cat'] = 'MULTI'
sj.loc[sj.office.str.contains('LATHROP MAYOR, LATHROP CITY COUNCIL'),
       'office_cat'] = 'MULTI'
sj.loc[sj.office.str.contains('GALTJOINT UNION ELEMENTARY SCHOOL DISTRICT'),
       'office_cat'] = 'MULTI'
sj.loc[sj.office.str.contains('ALL COUNTYWIDE'),
       'office_cat'] = 'MULTI'
sj.loc[sj.office.str.contains('UNIFIED SCHOOL DISTRICT, MEASURE'),
       'office_cat'] = 'MULTI'
sj.loc[sj.office.str.contains('MOUNTAIN HOUSE COMMUNITY SERVICES'),
       'office_cat'] = 'MULTI'
ReportOfficeCats(new = sj, LOG_FILE = LOG_FILE)
merged = MergeData(sj, merged, VERBOSE)

#Solano county
sl = AutoStandardize(pd.read_csv(DIR+"solano_cleaned.csv"))
DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'California: Solano manual')
sl = AssignBaseOfficeCats(sl)
ReportOfficeCats(new = sl, LOG_FILE = LOG_FILE)
merged = MergeData(sl, merged, VERBOSE)


################################################################################
# CO El Paso county manual
################################################################################
DIR = "../state_data/CO/ready/"

co = AutoStandardize(pd.read_csv(DIR+"CO-elpaso-cleaned.csv"))
DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'Colorado: El Paso manual')
co = co.rename(columns = {
    "original_votes": "original",
    "audited_votes": "audited",
    "diff": "difference",
})

#Standardize info
co = co.drop(["date"], axis = 1)
co["method"] = "MACHINE"

co.loc[co.office == 'COUNTY COMISSIONER', 'office'] = 'COUNTY COMMISSIONER'

CheckNames(new = co,
           merged = merged,
           LOG_FILE = LOG_FILE)
CheckStandards(new = co,
               merged = merged,
               officesToCheck = co["office"].unique(),
               LOG_FILE = LOG_FILE)
co = AssignBaseOfficeCats(co)
ReportOfficeCats(new = co, LOG_FILE = LOG_FILE)
merged = MergeData(co, merged, VERBOSE)


################################################################################
# CO RLA
################################################################################
#Same directory

co = AutoStandardize(pd.read_csv(DIR+"co_rla_cleaned.csv"))
DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'Colorado: statewide RLA')
CheckNames(new = co,
           merged = merged,
           LOG_FILE = LOG_FILE)
merged = MergeData(co, merged, VERBOSE)

WrapState(LOG_FILE, 'CO RLA')


################################################################################
# DC Manual
################################################################################
DIR = "../state_data/DC/ready/"

dc = AutoStandardize(pd.read_csv(DIR+"dc_cleaned.csv"))
DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'DC: district-wide Manual')
CheckNames(new = dc,
           merged = merged,
           LOG_FILE = LOG_FILE)
dc = AssignBaseOfficeCats(dc)

office_to_cat = {
             'MEMBER OF THE COUNCIL OF THE DISTRICT OF COLUMBIA': 'CITY'
                }
for f in office_to_cat:
    dc.loc[dc.office == f, 'office_cat'] = office_to_cat[f]

ReportOfficeCats(new = dc, LOG_FILE = LOG_FILE)

merged = MergeData(dc, merged, VERBOSE)


################################################################################
# DE
################################################################################
DIR = "../state_data/DE/ready/"

de = AutoStandardize(pd.read_csv(DIR+"de_cleaned.csv"))
DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'Delaware manual')

#Standardize info
de["division"] = de["location"] + "_" + de["machine"]
de = de.rename(columns = {
    "literal_diff": "issues",
})
de = de.drop(["margin_diff", "date", "location", "machine"], axis = 1)
de["method"] = "MANUAL"

de = AssignBaseOfficeCats(de)
office_to_cat = {
             'INSURANCE COMMISSIONER': 'REGULATORY'
                }
for f in office_to_cat:
    de.loc[de.office == f, 'office_cat'] = office_to_cat[f]
ReportOfficeCats(new = de, LOG_FILE = LOG_FILE)

merged = MergeData(de, merged, VERBOSE)


################################################################################
# FL
################################################################################
DIR = "../state_data/FL/ready/"

fl = AutoStandardize(pd.read_csv(DIR+"fl_cleaned.csv"))
DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'Florida statewide (already merged within state)')
#Standardize variables and values
fl = fl.rename(columns = {
    "type": "method",
    "subset": "division"
})
fl.loc[fl["method"] == "AUTOMATIC", "method"] = "MACHINE"

fl.office = fl.office.str.replace(r'?_XDC00_','TI')
replacements = {
    #https://www.hendryelections.org/LinkClick.aspx?fileticket=47MVaHtBttc%3D
    'LEHIGH': 'LEHIGH ACRES MUNICIPAL SERVICES IMPROVEMENT DISTRICT SEAT 1',
    'LOCAL': 'PASEO COMMUNITY DEVELOPMENT DISTRICT SEAT 4',
    'PRESIDENT AND VICE PRESIDENT': 'US PRESIDENT',
    'PRESIDENT/VICE PRESIDENT (VOTE FOR 1)': 'US PRESIDENT',
    'PRESIDENT AND VICE PRESIDENT (VOTE FOR 1)': 'US PRESIDENT',
    'PRESIDENT (VOTE FOR 1)': 'US PRESIDENT',
    'SHERIF (VOTE FOR 1)': 'SHERIFF (VOTE FOR 1)',
    'FORT PIERCE DISTRICT 1 (VOTE FOR 1)': \
            'FORT PIERCE CITY COMMISSION DISTRICT 1 (VOTE FOR 1)',
    'FORT PIERCE DISTRICT 2 (VOTE FOR 1)':\
            'FORT PIERCE CITY COMMISSION DISTRICT 2 (VOTE FOR 1)',
    'PORTOFINO SHORES SEAT 1 (VOTE FOR 1)':\
            'PORTOFINO SHORES, CDD SEAT 1 (VOTE FOR 1)',
    'PORTOFINO SHORES SEAT 2 (VOTE FOR 1)':\
            'PORTOFINO SHORES, CDD SEAT 2 (VOTE FOR 1)',
    'RIVER PLACE ON THE ST, LUCIE, SEAT 4 (VOTE FOR 1)': \
        'RIVER PLACE ON THE ST LUCIE, CDD SEAT 4 (VOTE FOR 1)',
    'RIVER PLACE ON THE ST LUCIE, SEAT 5 (VOTE FOR 1)': \
        'RIVER PLACE ON THE ST LUCIE, CDD SEAT 5 (VOTE FOR 1)',
    'TRADITION 6, SEAT 2 (VOTE FOR 1)': \
        'TRADITION 6, CDD SEAT 2 (VOTE FOR 1)'        
    }
fl = ReplaceInCol(df = fl,
                  replacements = replacements,
                  idCol = 'office',
                  targetCol = 'office'
                 )

fl = SplitColByStr(df = fl,
                   oldColName = 'office',
                   newColName = 'district',
                   theString = 'DISTRICT ([0-9]*)'
                   )
fl = SplitColByStr(df = fl,
                   oldColName = 'office',
                   newColName = 'district',
                   theString = ', D([0-9]*)'
                   )

#Replace whole word if substring encountered
substr_reps = {
               'STATE ATIORNEY': 'STATE ATTORNEY',
               'REPRESENTATIVE IN CONGRESS': 'US HOUSE',
               'REP IN CONGR': 'US HOUSE',
               'STATE REPRESENTATIVE': 'STATE HOUSE',
               'STATE REP': 'STATE HOUSE',
               'STATE SENATOR': 'STATE SENATE',
              }
for rep in substr_reps:
    fl.loc[fl.office.str.contains(rep), 'office'] = substr_reps[rep]

#Replace only substring
substr_subs = {
               'SUP CRT': 'SUPERIOR COURT',
               'DIST CRT': 'DISTRICT COURT',
               'COUNTY COMM': 'COUNTY COMMISSIONER',
               'CITY COMM': 'CITY COMMISSIONER'
              }
for rep in substr_subs:
    fl.office = fl.office.str.replace(rep, substr_subs[rep])


#Now candidate replacements
replacements = {
                'JIM KA!LINGER': 'JIM KALLINGER',
                'JOSHUE HICKS': 'JOSHUA HICKS',
                'SHOAF': 'JASON SHOAF',
                'KHAN': 'TAYMOUR KHAN',
                'BARBIR HIGGINBOTHAM': 'BARBIE HIGGINBOTHAM',
                'WRITEIN': 'WRITE-IN',
                'BIDEN, JOE R': 'JOSEPH R BIDEN',
                'RICK HURTS': 'RICK HURST',
                'NO/NO': 'NO',
                'YES/S1': 'YES',
                'YES/51': 'YES',
                'COHEN': 'JO JORGENSEN',
                'DON BLANKENSHIP / WILLIAM MOHR': 'DON BLANKENSHIP',
                'DON BLANKENSHIP/WILLIAM MOHR': 'DON BLANKENSHIP',
                'DONALD J TRUMP! MICHAEL R PENCE': 'DONALD J TRUMP',
                'DONALD J TRUMP/MICHAEL R PENCE': 'DONALD J TRUMP',
                'G RICHARDSON': 'ROQUE "ROCKY" DE LA FUENTE',
                'GLORIA LA RIVA / SUNIL FREEMAN': 'GLORIA LA RIVA',
                'GLORIA LA RIVA/SUNIL FREEMAN': 'GLORIA LA RIVA',
                'GLORIA LA RIVE': 'GLORIA LA RIVA',
                'HOWIE HAWKINS / ANGELA NICOLE WALKER': 'HOWIE HAWKINS',
                'HOWIE HAWKINS/ANGELA NICOLE WALKER': 'HOWIE HAWKINS',
                'JO JORGENSEN /JEREMY "SPIKE" COHEN': 'JO JORGENSEN',
                'JO JORGENSEN/JEREMY "SPIKE" COHEN': 'JO JORGENSEN',
                'JOSEPH R BIDEN / KAMALA D HARRIS': 'JOSEPH R BIDEN',
                'JOSEPH R BIDEN/KAMALA D HARRIS': 'JOSEPH R BIDEN',
                'ROQUE "ROCKY" DE LA FUENTE - DARCY G RICHARDSON':\
                        'ROQUE "ROCKY" DE LA FUENTE',
                'ROQUE "ROCKY" DE LA FUENTE/DARCY G RICHARDSON':\
                        'ROQUE "ROCKY" DE LA FUENTE',
                'WALKER': 'HOWIE HAWKINS'
               }
fl = ReplaceInCol(df = fl,
                  replacements = replacements,
                  idCol = 'candidate',
                  targetCol = 'candidate'
                 )
fl.candidate = fl.candidate.str.replace(r'?_XDC00_','TT', regex=False)
fl.candidate = fl.candidate.str.replace(r'MULLIZ','MUNIZ', regex=False)

#Fix a miscat in the raw data
fl.loc[(fl.candidate == 'KEVIN ROBERTS') &
        (fl.office == 'US HOUSE'), 'office'] = 'COUNTY COMMISSIONER'
fl.loc[(fl.candidate == 'CARMELLO GARCIA') &
        (fl.office == 'US HOUSE'), 'office'] = 'COUNTY COMMISSIONER'

CheckNames(new = fl,
           merged = merged,
           LOG_FILE = LOG_FILE)
CheckStandards(new = fl,
               merged = merged,
               officesToCheck = fl["office"].unique(),
               LOG_FILE = LOG_FILE)

fl = AssignBaseOfficeCats(fl)
office_to_cat = {
                 'HOLMES BEACH COMMISSIONER': 'CITY',
                 #Upon searching, these are all city councils
                 'COUNCIL MEMBER': 'CITY',
                 'PROPERTY APPRAISER': 'SERVICES',
                 'BEACH MOSQUITO CONTROL DISTRICT SEAT 3': 'SERVICES',
                 'TAX COLLECTOR (VOTE FOR 1)': 'SERVICES',
                 'PROPERTY APPRAISER (VOTE FOR 1)': 'SERVICES',
                 'LEON SOIL, S5 (VOTE FOR 1)': 'SERVICES'
                }
for f in office_to_cat:
    fl.loc[fl.office == f, 'office_cat'] = office_to_cat[f]
refs = ['CONSTITUTIONAL AMENDMENT ARTICLE',
        'CONSTITUTIONAL AMENDMENTS ARTICLE',
        "RAISING FLORIDA'S MINIMUM WAGE",
        'VOTER APPROVAL OF CONSTITUTIONAL',
        'LIMITATION ON HOMESTEAD ASSESSMENTS',
        'AD VALOREM TAX DISCOUNT FOR SPOUSES',
        'DEPARTMENT HEAD RIGHT TO',
        'ANNUAL REVIEW OF COUNTY',
        'CODE OF ETHICS AS PROPOSED BY',
        'USE OF SURPLUS YEAR-END FUNDS',
        'SECTION 105',
        'SECTION 305',
        'SECTION 306',
        'SECTION 404',
        'SECTION 505',
        'SECTION 512',
        'SECTION 513',
        'SECTION 603',
        'CONSTITUTIONAL AMENDMENT',
        'POMONA PARK CHARTER AMENDMENT'
        ]
for ref in refs:
    fl.loc[fl.office.str.contains(ref), 'office_cat'] = 'REFERENDUM'
#While it's hard to think of what CDD could be a substring of, let's be cautious
fl.loc[fl.office.str.contains('SUPERVISOR, .* CDD'),\
                              'office_cat'] = 'SERVICES'
fl.loc[fl.office.str.contains('CDD, '), 'office_cat'] = 'SERVICES'
fl.loc[fl.office.str.contains(', CDD'), 'office_cat'] = 'SERVICES'
fl.loc[fl.office.str.contains('[0-9] - AMEND'), 'office_cat'] = 'REFERENDUM'
fl.loc[fl.office.str.contains('[0-9]- AMEND'), 'office_cat'] = 'REFERENDUM'

ReportOfficeCats(new = fl, LOG_FILE = LOG_FILE)

merged = MergeData(fl, merged, VERBOSE)


################################################################################
# GA RLA
################################################################################
DIR = "../state_data/GA/ready/"

ga = AutoStandardize(pd.read_csv(DIR+"GA_cleaned.csv"))
DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'Georgia statewide RLA')
CheckNames(new = ga,
           merged = merged,
           LOG_FILE = LOG_FILE)
CheckStandards(new = ga,
               merged = merged,
               officesToCheck = ga["office"].unique(),
               LOG_FILE = LOG_FILE)
ga = AssignBaseOfficeCats(ga)
ReportOfficeCats(new = ga, LOG_FILE = LOG_FILE)
merged = MergeData(ga, merged, VERBOSE)


################################################################################
# IA
#
# The Iowa audit is a bit of an edge case. It is an office-level total number of
# ballots. That is different from the case of Texas, which is just an overall
# number of ballots. We judge that it is most straightforward to call Iowa
# ballot-level data and include it in the merged file of all audits, but care
# must be taken not to double-count it in any ballot-level analyses.
################################################################################
DIR = "../state_data/IA/ready/"

ia = AutoStandardize(pd.read_csv(DIR+"Iowa_cleaned.csv"))
DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'Iowa statewide')
ia = ia.rename(columns = {
    "original_votes": "original",
    "audited_votes": "audited",
    "diff": "difference"
})
#Standardize variables and values
ia["method"] = "MANUAL"
ia = ia.drop(["date"], axis=1)
CheckNames(new = ia,
           merged = merged,
           LOG_FILE = LOG_FILE)
ia = AssignBaseOfficeCats(ia)
ReportOfficeCats(new = ia, LOG_FILE = LOG_FILE)
merged = MergeData(ia, merged, VERBOSE)


################################################################################
# ID
################################################################################
DIR = "../state_data/ID/ready/"
ih = AutoStandardize(pd.read_csv(DIR+"idaho_cleaned.csv"))
DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'Idaho: county-level manual')

ih.difference = ih.audited - ih.original

CheckNames(new = ih,
           merged = merged,
           LOG_FILE = LOG_FILE)
CheckStandards(new = ih,
               merged = merged,
               officesToCheck = ih["office"].unique(),
               LOG_FILE = LOG_FILE)
ih = AssignBaseOfficeCats(ih)
ReportOfficeCats(new = ih, LOG_FILE = LOG_FILE)
merged = MergeData(ih, merged, VERBOSE)


################################################################################
# MA
################################################################################
DIR = "../state_data/MA/ready/"

#US HOUSE
ma = AutoStandardize(pd.read_csv(DIR+"Congress_MA.csv"))
DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'Massachusetts: US House manual')
ma = ma.rename(columns = {
    "original_votes": "original",
    "audited_votes": "audited",
    "diff": "difference",
    "town": "township"
})
ma = ma.drop(["date"], axis = 1)
ma["method"] = "MANUAL"

#Standardize all write-in candidates to the value `WRITE-IN`
ma.loc[ma.candidate.str.contains('WRITE-IN'), 'candidate'] = 'WRITE-IN'

CheckNames(new = ma,
           merged = merged,
           LOG_FILE = LOG_FILE)
CheckStandards(new = ma,
               merged = merged,
               officesToCheck = ma["office"].unique(),
               LOG_FILE = LOG_FILE)
ma = AssignBaseOfficeCats(ma)
ReportOfficeCats(new = ma, LOG_FILE = LOG_FILE)
merged = MergeData(ma, merged, VERBOSE)

#PRESIDENT AND US SENATE
ma = AutoStandardize(pd.read_csv(DIR+"President and US Senate Recleaned.csv"))
DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'Massachusetts: President and US Senate manual')
ma = ma.rename(columns = {
    "original_votes": "original",
    "audited_votes": "audited",
    "diff": "difference",
    "town": "township"
})
ma = ma.drop(["date"], axis = 1)
ma["method"] = "MANUAL"
replacements = {'HOWARD G HAWKINS': 'HOWIE HAWKINS'}
ma = ReplaceInCol(df = ma,
                  replacements = replacements,
                  idCol = 'candidate',
                  targetCol = 'candidate'
                 )
CheckNames(new = ma,
           merged = merged,
           LOG_FILE = LOG_FILE)
CheckStandards(new = ma,
               merged = merged,
               officesToCheck = ma["office"].unique(),
               LOG_FILE = LOG_FILE)
ma = AssignBaseOfficeCats(ma)
ReportOfficeCats(new = ma, LOG_FILE = LOG_FILE)
merged = MergeData(ma, merged, VERBOSE)

#QUESTION 1
ma = AutoStandardize(pd.read_csv(DIR+"Question 1_MA.csv"))
DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'Massachusetts: Question 1 manual')
ma = ma.rename(columns = {
    "original_votes": "original",
    "audited_votes": "audited",
    "diff": "difference",
    "town": "township"
})
ma = ma.drop(["date"], axis = 1)
ma["method"] = "MANUAL"
CheckNames(new = ma,
           merged = merged,
           LOG_FILE = LOG_FILE)
CheckStandards(new = ma,
               merged = merged,
               officesToCheck = ma["office"].unique(),
               LOG_FILE = LOG_FILE)
ma = AssignBaseOfficeCats(ma)
ReportOfficeCats(new = ma, LOG_FILE = LOG_FILE)
merged = MergeData(ma, merged, VERBOSE)

#STATE REP
ma = AutoStandardize(pd.read_csv(DIR+"State Rep_MA.csv"))
DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'Massachusetts: State Representative manual')
ma = ma.rename(columns = {
    "original_votes": "original",
    "audited_votes": "audited",
    "diff": "difference",
    "town": "township"
})
ma = ma.drop(["date"], axis = 1)
ma["method"] = "MANUAL"
ma.office = 'STATE HOUSE'
CheckNames(new = ma,
           merged = merged,
           LOG_FILE = LOG_FILE)
CheckStandards(new = ma,
               merged = merged,
               officesToCheck = ma["office"].unique(),
               LOG_FILE = LOG_FILE)
ma = AssignBaseOfficeCats(ma)
ReportOfficeCats(new = ma, LOG_FILE = LOG_FILE)
merged = MergeData(ma, merged, VERBOSE)

#STATE SENATE
ma = AutoStandardize(pd.read_csv(DIR+"State Senator_MA.csv"))
DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'Massachusetts: State Senator manual')
ma = ma.rename(columns = {
    "original_votes": "original",
    "audited_votes": "audited",
    "diff": "difference",
    "town": "township"
})
ma = ma.drop(["date"], axis = 1)
ma["method"] = "MANUAL"
ma.office = 'STATE SENATE'
CheckNames(new = ma,
           merged = merged,
           LOG_FILE = LOG_FILE)
CheckStandards(new = ma,
               merged = merged,
               officesToCheck = ma["office"].unique(),
               LOG_FILE = LOG_FILE)
ma = AssignBaseOfficeCats(ma)
ReportOfficeCats(new = ma, LOG_FILE = LOG_FILE)
merged = MergeData(ma, merged, VERBOSE)


################################################################################
# MD
################################################################################
DIR = "../state_data/MD/ready/"

md = AutoStandardize(pd.read_csv(DIR+"md_cleaned.csv"))
DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'Maryland statewide (already merged within state)')

#Office and district
md.office = md.office.str.replace('(\(VOTE.*)','',regex=True)
md.office = md.office.str.strip()
#Mind the order: more specific must go later
offices_to_split = [
                    'DISTRICT (.*)',
                    '(COMMISSIONER DISTRICT .*)',
                    '(COUNCILMANIC DISTRICT .*)',
                    '([0-9] COUNCILMANIC DISTRICT)$',
                    '([0-9] COMMISSIONER DISTRICT)$',
                    '(AT LARGE)',
                    'BOARD OF EDUCATION - ([0-9])$',
                    'BOARD OF EDUCATION -([0-9])$',
                    '([0-9] BOARD OF EDUCATION DISTRICT)',
                    '(BOARD OF EDUCATION DISTRICT .*)',
                   ]
for theString in offices_to_split:
    md = SplitColByStr(df = md,
                       oldColName = 'office',
                       newColName = 'district',
                       theString = theString
                       )

#Manual district assignments
office_to_dist = {
                  'COUNTY COUNCIL - 5': '5',
                  'COUNTY COUNCIL - 1': '1',
                  'COUNTY COUNCIL - 2 COUNCILMANIC DISTRICT 2': '2'
                 }
md = ReplaceInCol(df = md,
                  replacements = office_to_dist,
                  idCol = 'office',
                  targetCol = 'district'
                 )

#Now manually clean the office field
replacements = {
                'PRESIDENT - VICE PRES': 'US PRESIDENT',
                'PRESIDENT-VICE PRES': 'US PRESIDENT',
                'JUDGE COURT OF APPEALS APPELLATE CIRCUIT1': \
                    'JUDGE COURT OF APPEALS APPELLATE CIRCUIT 1',
                'JUDGE OF THE CIRCUIT COURT JUDICIAL CIRCUITS': \
                    'JUDGE OF THE CIRCUIT COURT JUDICIAL CIRCUIT 5',
                '2': 'QUESTION 2'
               }
md = ReplaceInCol(df = md,
                  replacements = replacements,
                  idCol = 'office',
                  targetCol = 'office'
                 )
md.loc[md.office.str.contains('REPRESENTATIVE IN CONGRESS'),
       'office'] = 'US HOUSE'
md.loc[md.office.str.contains('MEMBER CITY COUNCIL'), 'office'] = \
        'MEMBER CITY COUNCIL'
md.loc[md.office.str.contains('COUNTY COUNCIL'), 'office'] = \
        'COUNTY COUNCIL'
md.loc[md.office.str.contains('REPRESENTATIVE IN CONGRESS'),
       'office'] = 'US HOUSE'
md.loc[md.office.str.contains('BOARD OF EDUCATION'), 'office'] = \
        'BOARD OF EDUCATION'
md.loc[md.office.str.contains('MAYOR OF HAGERSTOWN'), 'office'] = \
        'MAYOR OF HAGERSTOWN'
md.loc[md.office.str.contains('HAGERSTOWN COUNCIL'), 'office'] = \
        'HAGERSTOWN COUNCIL'
md.loc[md.office.str.contains('COUNCIL - CITY OF CUMBERLAND'), 'office'] = \
        'CITY OF CUMBERLAND COUNCIL'
md.district = md.district.str.strip()

#Candidates
replacements = {
  'BID EN-HA RRIS': 'JOSEPH R BIDEN',
  'BIDEN-HARRIS': 'JOSEPH R BIDEN',
  'HAWKINS-WALKER': 'HOWIE HAWKINS',
  'JORGENSEN-COHEN': 'JO JORGENSEN',
  'SEGAL-DE GRAAF': 'JEROME SEGAL',
  'TRUMP-PENCE': 'DONALD J TRUMP',
  'CA DUTCH RUPPERSBERGER': 'C A DUTCH RUPPERSBERGER',
  'CADUTCH RUPPERSBERGER': 'C A DUTCH RUPPERSBERGER',
  'JOHNNY RAY SAILING': 'JOHNNY RAY SALLING',
  'JOHNNY RAY SATING': 'JOHNNY RAY SALLING',
  'NEIL C PARROT': 'NEIL C PARROTT',
  'AGAINST THE CONSTITUTIONAL AMENDME': 'AGAINST THE CONSTITUTIONAL AMENDMENT',
  'AGAINST THE CONSTITUTIONAL AMENDME  1': 'AGAINST THE CONSTITUTIONAL AMENDMENT',
  'FOR THE CONSTTUTONAL AMENDMENT': 'FOR THE CONSTITUTIONAL AMENDMENT',
  'EDWARD BURROUGHS, HI': 'EDWARD BURROUGHS III',
  'EDWARD BURROUGHS, ILI': 'EDWARD BURROUGHS III',
  'INDIA LOCHS' : 'INDIA L OCHS',
  'JASON "MR 1" JOHNSON': 'JASON "MR J" JOHNSON',
  'JASON "MR1" JOHNSON': 'JASON "MR J" JOHNSON',
  'JASON "MR1"JOHNSON': 'JASON "MR J" JOHNSON',
  'LINDAM WIDMYER': 'LINDA M WIDMYER',
  'MICHAEL FRYER': 'MICHAEL FRYAR',
  'SUNILDASGUPTA': 'SUNIL DASGUPTA',
  'TIERNEY FARIAN DAVIS': 'TIERNEY FARLAN DAVIS',
  'SUZANNE B SINCEII': 'SUZANNE B SINCELL',
  'TOM HEFNER': 'TOM HEFFNER',
  'BIBIM BERRY': 'BIBI M BERRY',
  'BOB MEFLEY': 'BOB MEFFLEY',
  'NO KATHRYN GRILL GRAEF': 'NO KATHRYN GRILL GRAEFF',
  'YES KATHRYN GRILL GRAEF': 'YES KATHRYN GRILL GRAEFF',
  'SYLVESTER YOUNG, HI': 'SYLVESTER YOUNG III',
  'SYLVESTER YOUNG, ILI': 'SYLVESTER YOUNG III',
  'ANDREW MARTIN BATISTA': 'ANDREW MARTIN BATTISTA',
  'ANDREW MARTIN BATLISTA': 'ANDREW MARTIN BATTISTA',
  'BILLHENRY': 'BILL HENRY'
               }
md = ReplaceInCol(df = md,
                  replacements = replacements,
                  idCol = 'candidate',
                  targetCol = 'candidate'
                 )


CheckNames(new = md,
           merged = merged,
           LOG_FILE = LOG_FILE)
CheckStandards(new = md,
               merged = merged,
               officesToCheck = md["office"].unique(),
               LOG_FILE = LOG_FILE)
ReportDistricts(df = md, LOG_FILE = LOG_FILE)
md = AssignBaseOfficeCats(md)
ReportOfficeCats(new = md, LOG_FILE = LOG_FILE)
merged = MergeData(md, merged, VERBOSE)


################################################################################
# MI
################################################################################
DIR = "../state_data/MI/ready/"
mi = AutoStandardize(pd.read_csv(DIR+"mi_antrim.csv"))
DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'Michigan Antrim county: manual')
mi = mi.rename(columns = {
    "original_votes": "original",
    "audited_votes": "audited",
    "diff": "difference",
    "jurisdiction": "township"
})
mi = mi.drop(["date"], axis = 1)
mi = mi.iloc[:, 1:]
mi["method"] = "MANUAL"
CheckNames(new = mi,
           merged = merged,
           LOG_FILE = LOG_FILE)
CheckStandards(new = mi,
               merged = merged,
               officesToCheck = mi["office"].unique(),
               LOG_FILE = LOG_FILE)
mi = AssignBaseOfficeCats(mi)
ReportOfficeCats(new = mi, LOG_FILE = LOG_FILE)
merged = MergeData(mi, merged, VERBOSE)


################################################################################
# MN
################################################################################
DIR = "../state_data/MN/ready/"
mn = AutoStandardize(pd.read_csv(DIR+"mn_clean.csv"))
DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'Minnesota: statewide manual')
mn = mn.rename(columns = {
    "original_votes": "original",
    "audited_votes": "audited",
    "diff": "difference"
})
mn = mn.drop(["date"], axis = 1)
mn["method"] = "MANUAL"
CheckNames(new = mn,
           merged = merged,
           LOG_FILE = LOG_FILE)
CheckStandards(new = mn,
               merged = merged,
               officesToCheck = mn["office"].unique(),
               LOG_FILE = LOG_FILE)
mn = AssignBaseOfficeCats(mn)
ReportOfficeCats(new = mn, LOG_FILE = LOG_FILE)
merged = MergeData(mn, merged, VERBOSE)


################################################################################
# NC
################################################################################
DIR = "../state_data/NC/ready/"
nc = AutoStandardize(pd.read_csv(DIR+"North Carolina cleaned.csv"))
DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'North Carolina: statewide manual')
nc = nc.rename(columns = {
    "original_votes": "original",
    "audited_votes": "audited",
    "diff": "difference"
})
nc = nc.drop(["date"], axis = 1)
nc["method"] = "MANUAL"
replacements = {
                'HAWKINS': 'HOWIE HAWKINS',
                'BIDEN': 'JOSEPH R BIDEN',
                'BLANKENSHIP': 'DON BLANKENSHIP',
                'JORGENSEN': 'JO JORGENSEN',
                'TRUMP': 'DONALD J TRUMP'
               }
nc = ReplaceInCol(df = nc,
                  replacements = replacements,
                  idCol = 'candidate',
                  targetCol = 'candidate'
                 )
CheckNames(new = nc,
           merged = merged,
           LOG_FILE = LOG_FILE)
CheckStandards(new = nc,
               merged = merged,
               officesToCheck = nc["office"].unique(),
               LOG_FILE = LOG_FILE)
nc = AssignBaseOfficeCats(nc)
ReportOfficeCats(new = nc, LOG_FILE = LOG_FILE)
merged = MergeData(nc, merged, VERBOSE)


################################################################################
# NH
################################################################################
DIR = "../state_data/NH/ready/"
nh = AutoStandardize(pd.read_csv(DIR+"NH-cleaned.csv"))
DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'New Hampshire: substate manual')
nh = nh.rename(columns = {
    "original_votes": "original",
    "audited_votes": "audited",
    "diff": "difference",
    "subset": "division"
})
nh = nh.drop(["date"], axis = 1)
nh["method"] = "MANUAL"
replacements = {
                'STATE REPRESENTATIVE': 'STATE HOUSE'
               }
nh = ReplaceInCol(df = nh,
                  replacements = replacements,
                  idCol = 'office',
                  targetCol = 'office'
                 )
replacements = {
                '[WRITE-IN]': 'WRITE-IN'
               }
nh = ReplaceInCol(df = nh,
                  replacements = replacements,
                  idCol = 'candidate',
                  targetCol = 'candidate'
                 )
CheckNames(new = nh,
           merged = merged,
           LOG_FILE = LOG_FILE)
CheckStandards(new = nh,
               merged = merged,
               officesToCheck = nh["office"].unique(),
               LOG_FILE = LOG_FILE)
nh = AssignBaseOfficeCats(nh)
ReportOfficeCats(new = nh, LOG_FILE = LOG_FILE)
merged = MergeData(nh, merged, VERBOSE)


################################################################################
# NJ
################################################################################
#Every county except Mercer
DIR = "../state_data/NJ/ready/"
nj = AutoStandardize(pd.read_csv(DIR+"nj_cleaned.csv"))
DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'New Jersey: statewide manual')
nj = nj.rename(columns = {
    "original_votes": "original",
    "audited_votes": "audited",
    "differences": "difference",
    "type": "method"
})
#Enforce that candidate is just blank when we're talking about a whole ballot
nj.loc[nj.office == "ALL", "candidate"] = ''
#Let the subset be by the city, batch, and tabulator
nj = nj.fillna('')
nj["division"] = ''
nj.loc[nj.city != "", "division"] = nj.city
nj.loc[(nj.division != "") & ((nj.batch != "") | (nj.tabulator != "")), 
       "division"] = nj.division + "_"
nj.loc[nj.batch != "", "division"] = nj.division + nj.batch
nj.loc[(nj.division != "") & (nj.tabulator != ""), 
       "division"] = nj.division + "_"
nj.loc[nj.tabulator != "", "division"] = nj.division + nj.tabulator.map(str)
nj = nj.drop(["city", "batch", "tabulator"], axis=1)
replacements = {
                'PRESIDENT': 'US PRESIDENT',
                'SENATE': 'US SENATE',
                'HOUSE': 'US HOUSE',
                'FREEHOLDERS': 'BOARD OF CHOSEN FREEHOLDERS'
               }
nj = ReplaceInCol(df = nj,
                  replacements = replacements,
                  idCol = 'office',
                  targetCol = 'office'
                 )
replacements = {
                'BIDEN/HARRIS': 'JOSEPH R BIDEN',
                'BILL HAMMONS ERIC BODENSTAB': 'BILL HAMMONS',
                'DE LA FUENTE/RICHARDSON': 'ROQUE "ROCKY" DE LA FUENTE',
                'DON BLANKENSHIP WILLIAM MOHR': 'DON BLANKENSHIP',
                'DONALD J TRUMP MICHAEL R PENCE': 'DONALD J TRUMP',
                'GLORIA ESTELA LA RIVA SUNIL FREEMAN': 'GLORIA LA RIVA',
                'HAMMONS/BODENSTAB': 'BILL HAMMONS',
                'HAWKINS/WALKER': 'HOWIE HAWKINS',
                'HOWIE HAWKINS ANGELA WALKER': 'HOWIE HAWKINS',
                'JO JORGENSEN SPIKE COHEN': 'JO JORGENSEN',
                'JOSEPH R BIDEN KAMALA D HARRIS': 'JOSEPH R BIDEN',
                'LA RIVA/FREEMAN': 'GLORIA LA RIVA',
                "ROQUE 'ROCKY' DE LA FUENTE DARCY G R":
                    'ROQUE "ROCKY" DE LA FUENTE',
                'TRUMP/PENCE': 'DONALD J TRUMP',
                'WRITE IN': 'WRITE-IN',
                'WRITEIN': 'WRITE-IN',
                'BOOKER': 'CORY BOOKER',
                'BRUKE': 'DANIEL BURKE',
                'BURKE': 'DANIEL BURKE',
                'FERNANDEZ': 'VERONICA FERNANDEZ',
                'HOFFMAN': 'MADELYN R HOFFMAN',
                'METHA': 'RIKIN "RIK" MEHTA',
                "RIKIN 'RIK' MEHTA": 'RIKIN "RIK" MEHTA',
                'EHRNSTORM': 'JESSE EHRNSTROM',
                'EHRNSTROM': 'JESSE EHRNSTROM',
                'HARVEY': 'JENNA HARVEY',
                'KENNEDY': 'AMY KENNEDY',
                'VAN DREW': 'JEFF VAN DREW'
               }
nj = ReplaceInCol(df = nj,
                  replacements = replacements,
                  idCol = 'candidate',
                  targetCol = 'candidate'
                 )
#We find the text from Mercer County that Frank Palotta received a certain
# number of votes which was adjusted. That section declares that it was in the
# US SENATE race. That seems implausible on its face -- it discusses a mark on
# the ballot, which would presumably not be relevant if it were a write-in
# vote, and the total is too high to reasonably be a sum of write-in votes
# for a candidate who is actively contesting another office. So we proceed
# with the assumption that "US Senator" in that document is a typo of "US House"
nj.loc[(nj.candidate == 'FRANK PALOTTA') & (nj.office == 'US SENATE'),
       'office'] = 'US HOUSE'
CheckNames(new = nj,
           merged = merged,
           LOG_FILE = LOG_FILE)
CheckStandards(new = nj,
               merged = merged,
               officesToCheck = nj["office"].unique(),
               LOG_FILE = LOG_FILE)
nj = AssignBaseOfficeCats(nj)
nj.loc[nj.office == 'BOARD OF CHOSEN FREEHOLDERS', 'office_cat'] = 'COUNTY'
nj.loc[nj.office == 'CLERK', 'office_cat'] = 'SERVICES'
ReportOfficeCats(new = nj, LOG_FILE = LOG_FILE)
merged = MergeData(nj, merged, VERBOSE)

#Then Mercer
nj = AutoStandardize(pd.read_csv(DIR+"mercer_cleaned.csv"))
DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'New Jersey: Mercer manual')
nj.loc[nj.office == 'US HOUSE OF REPRESENTATIVES 12TH', 'district'] = 12
nj.loc[nj.office == 'US HOUSE OF REPRESENTATIVES 4TH', 'district'] = 4
replacements = {
                'US HOUSE OF REPRESENTATIVES 4TH': 'US HOUSE',
                'US HOUSE OF REPRESENTATIVES 12TH': 'US HOUSE',
                'PRESIDENT': 'US PRESIDENT'
               }
nj = ReplaceInCol(df = nj,
                  replacements = replacements,
                  idCol = 'office',
                  targetCol = 'office'
                 )
nj.candidate = nj.candidate.str.strip()
nj.candidate = nj.candidate.str.replace('  ',' ')
nj.candidate = nj.candidate.str.replace(r'""',r'"')
replacements = {
                'ARMEN SIMONIAN': 'WRITE-IN',
                'PETE BUTTIGIEG': 'WRITE-IN',
                'BRENNAN T DALE': 'WRITE-IN',
                'BRIAN CARROL/AMAR PATEL': 'BRIAN CARROL',
                'GLORIA ESTELA LA RIVA': 'GLORIA LA RIVA',
                'JESUS CHRIST': 'WRITE-IN',
                'MICHAEL BENNET': 'WRITE-IN',
                'ROQUE DE LA FUENTE': 'ROQUE "ROCKY" DE LA FUENTE',
                'ROQUE DE LE FUENTE': 'ROQUE "ROCKY" DE LA FUENTE',
                'BRUAN BOCCANFUSO': 'BRYAN BOCCANFUSO',
                'BONNIE KONEY': 'WRITE-IN',
                'BONNIE KOVEY': 'WRITE-IN',
                'IMRAN JEMAL': 'WRITE-IN'
               }
nj = ReplaceInCol(df = nj,
                  replacements = replacements,
                  idCol = 'candidate',
                  targetCol = 'candidate'
                 )
CheckNames(new = nj,
           merged = merged,
           LOG_FILE = LOG_FILE)
CheckStandards(new = nj,
               merged = merged,
               officesToCheck = nj["office"].unique(),
               LOG_FILE = LOG_FILE)
nj = AssignBaseOfficeCats(nj)
nj.loc[nj.office == 'BOARD OF CHOSEN FREEHOLDERS', 'office_cat'] = 'SERVICES'
nj.loc[nj.office == 'COUNTY CLERK', 'office_cat'] = 'SERVICES'
ReportOfficeCats(new = nj, LOG_FILE = LOG_FILE)
merged = MergeData(nj, merged, VERBOSE)


################################################################################
# NM
################################################################################
DIR = "../state_data/NM/ready/"
nm = AutoStandardize(pd.read_csv(DIR+"NM Recleaned.csv"))
DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'New Mexico: statewide RLA')
nm = nm.rename(columns = {
    "original_votes": "original",
    "audited_votes": "audited",
    "diff": "difference"
})
nm = nm.drop(["date"], axis = 1)
nm = nm.iloc[:, 1:]
nm["method"] = "RLA"
nm.loc[nm.office.str.contains('DISTRICT 2'), 'district'] = 2
nm.office = nm.office.str.strip()
replacements = {
                'USSENATOR': 'US SENATE',
                'NMJUDGE OF THE COURT OF APPEALS-POSITION3':
                    'JUDGE OF THE COURT OF APPEALS POSITION 3',
                'US SENATOR': 'US SENATE',
                'USREPRESENTATIVE-DISTRICT 2': 'US HOUSE',
                'US REPRESENTATIVE - DISTRICT 2': 'US HOUSE',
                'NM JUDGE OF THE COURT OF APPEALS - POSITION 3':
                    'JUDGE OF THE COURT OF APPEALS POSITION 3',
                'NM JUDGE OF THE COURT OF APPEALS- POSITION 3':
                    'JUDGE OF THE COURT OF APPEALS POSITION 3',
                'NMJUDGE OF THE COURT OF APPEALS-POSITION 3-DISTRICT 2':
                    'JUDGE OF THE COURT OF APPEALS POSITION 3',
                'US PRESIDENT AND VICE-PRESIDENT': 'US PRESIDENT'
               }
nm = ReplaceInCol(df = nm,
                  replacements = replacements,
                  idCol = 'office',
                  targetCol = 'office'
                 )
nm.candidate = nm.candidate.str.strip()
replacements = {
                'MARK VRONCHETTI': 'MARK V RONCHETTI',
                'JANE BYOHALEM': 'JANE B YOHALEM',
                'THOMAS CMONTOYA': 'THOMAS C MONTOYA',
                'XOCHITEL TORRES SMALL': 'XOCHITL TORRES SMALL'
               }
nm = ReplaceInCol(df = nm,
                  replacements = replacements,
                  idCol = 'candidate',
                  targetCol = 'candidate'
                 )
CheckNames(new = nm,
           merged = merged,
           LOG_FILE = LOG_FILE)
CheckStandards(new = nm,
               merged = merged,
               officesToCheck = nm["office"].unique(),
               LOG_FILE = LOG_FILE)
nm = AssignBaseOfficeCats(nm)
ReportOfficeCats(new = nm, LOG_FILE = LOG_FILE)
merged = MergeData(nm, merged, VERBOSE)


################################################################################
# OH statewide
################################################################################
DIR = "../state_data/OH/ready/"
oh = AutoStandardize(pd.read_csv(DIR+"oh_statewide_cleaned.csv"))
DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'Ohio: statewide manual')
oh = SplitColByStr(df = oh,
                   oldColName = 'office',
                   newColName = 'district',
                   theString = 'DISTRICT (.*)'
                   )
oh.office = oh.office.str.replace('( - DISTRICT .*)','',regex=True)
replacements = {
                'STATE REPRESENTATIVE': 'STATE HOUSE',
                'REPRESENTATIVE TO CONGRESS': 'US HOUSE',
                'STATE SENATOR': 'STATE SENATE'
               }
oh = ReplaceInCol(df = oh,
                  replacements = replacements,
                  idCol = 'office',
                  targetCol = 'office'
                 )
CheckNames(new = oh,
           merged = merged,
           LOG_FILE = LOG_FILE)
CheckOfficeNames(oh, LOG_FILE)
oh = AssignBaseOfficeCats(oh)
cats = {
        'COUNTY RECORDER': 'SERVICES',
        'CORONER': 'SERVICES',
        'COUNTY TREASURER': 'SERVICES'
       }
for k in cats.keys():
    oh.loc[oh.office == k, 'office_cat'] = cats[k]
oh.loc[oh.office.str.contains('TAX LEVY'), 'office_cat'] = 'SERVICES'
ReportOfficeCats(new = oh, LOG_FILE = LOG_FILE)
merged = MergeData(oh, merged, VERBOSE)


################################################################################
# OH RLAs
################################################################################
DIR = "../state_data/OH/ready/"
oh = AutoStandardize(pd.read_csv(DIR+"oh_rlas_cleaned.csv"))
DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'Ohio: county RLAs')
CheckNames(new = oh,
           merged = merged,
           LOG_FILE = LOG_FILE)
CheckStandards(new = oh,
               merged = merged,
               officesToCheck = oh["office"].unique(),
               LOG_FILE = LOG_FILE)
oh = AssignBaseOfficeCats(oh)
ReportOfficeCats(new = oh, LOG_FILE = LOG_FILE)
merged = MergeData(oh, merged, VERBOSE)


################################################################################
# OR
################################################################################
DIR = "../state_data/OR/ready/"
ore = AutoStandardize(pd.read_csv(DIR+"or_audit.csv"))
DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'Oregon: statewide manual')
ore = ore.rename(columns = {
    "batch": "division",
})
ore["method"] = "MANUAL"
replacements = {
                'WRITE INS': 'WRITE-IN',
                'FAGEN': 'SHEMIA FAGAN',
                'THATCHER': 'KIM THATCHER',
                'MARKLEY': 'KYLE MARKLEY',
                'NATHALIE PRAVICINI': 'NATHALIE PARAVICINI',
                'PARAVICINI': 'NATHALIE PARAVICINI',
                'WRITEIN': 'WRITE-IN',
                'OVERVOTE': 'OVERVOTES',
                'UNDERVOTE': 'UNDERVOTES',
                'CROSS': 'MICHAEL CROSS',
                'ROSENBLUM': 'ELLEN ROSENBLUM',
                'HEDBOR': 'LARS D H HEDBOR',

               }
ore = ReplaceInCol(df = ore,
                  replacements = replacements,
                  idCol = 'candidate',
                  targetCol = 'candidate'
                 )
CheckNames(new = ore,
           merged = merged,
           LOG_FILE = LOG_FILE)
CheckStandards(new = ore,
               merged = merged,
               officesToCheck = ore["office"].unique(),
               LOG_FILE = LOG_FILE)
ore = AssignBaseOfficeCats(ore)
cats = {
        'SECRETARY OF STATE': 'SERVICES',
        'STATE TREASURER': 'SERVICES',
        'CITY OF GOLD BEACH COUNCIL POSITION 2': 'CITY'
       }
for k in cats.keys():
    ore.loc[ore.office == k, 'office_cat'] = cats[k]
ReportOfficeCats(new = ore, LOG_FILE = LOG_FILE)
merged = MergeData(ore, merged, VERBOSE)


################################################################################
# RI
################################################################################
DIR = "../state_data/RI/ready/"
ri = AutoStandardize(pd.read_csv(DIR+"RI-cleaned.csv"))
DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'Rhode Island: statewide RLA')
#The county column does not really hold counties -- let's just call it a 
# division
ri = ri.rename(columns = {
    "original_votes": "original",
    "audited_votes": "audited",
    "county": "division"
})
#There is a problem: the difference column is taken from a "net change" column
# rather than being the candidate-level differences
ri["difference"] = ri.audited - ri.original
ri["method"] = "RLA"
ri = ri.drop(["date"], axis = 1)
replacements = {
                'BIDEN': 'JOSEPH R BIDEN',
                'TRUMP': 'DONALD J TRUMP'
               }
ri = ReplaceInCol(df = ri,
                  replacements = replacements,
                  idCol = 'candidate',
                  targetCol = 'candidate'
                 )
CheckNames(new = ri,
           merged = merged,
           LOG_FILE = LOG_FILE)
CheckStandards(new = ri,
               merged = merged,
               officesToCheck = ri["office"].unique(),
               LOG_FILE = LOG_FILE)
ri = AssignBaseOfficeCats(ri)
ReportOfficeCats(new = ri, LOG_FILE = LOG_FILE)
merged = MergeData(ri, merged, VERBOSE)


################################################################################
# SC
################################################################################
DIR = "../state_data/SC/ready/"
sc = AutoStandardize(pd.read_csv(DIR+"sc_cleaned.csv"))
DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'South Carolina')
CheckNames(new = sc,
           merged = merged,
           LOG_FILE = LOG_FILE)
CheckStandards(new = sc,
               merged = merged,
               officesToCheck = sc["office"].unique(),
               LOG_FILE = LOG_FILE)
sc = AssignBaseOfficeCats(sc)
cats = {
        'AUDITOR': 'SERVICES',
        'CORONER': 'SERVICES',
        'COUNTY COUNCIL': 'COUNTY',
       }
for k in cats.keys():
    sc.loc[sc.office == k, 'office_cat'] = cats[k]
ReportOfficeCats(new = sc, LOG_FILE = LOG_FILE)
merged = MergeData(sc, merged, VERBOSE)


################################################################################
# TX: Forensic audits
#   The only county we have, Tarrant, is not even a count of the number of 
#   problems with ballots -- it is just counting the number of ballots versus 
#   the number of recorded check-ins
################################################################################


################################################################################
# UT 
################################################################################
DIR = "../state_data/UT/ready/"
ut = AutoStandardize(pd.read_csv(DIR+"ut_submitted.txt"))
DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'Utah: statewide manual')
ut = ut.rename(columns = {
    "audited_votes": "ballots",
    "diff": "issues"
})
ut = ut.drop("total_votes", axis = 1)
ut["method"] = "MANUAL"
ut["state"] = "UTAH"
CheckNames(new = ut,
           merged = merged,
           LOG_FILE = LOG_FILE)
merged = MergeData(ut, merged, VERBOSE)

WrapState(LOG_FILE, 'Utah manual')


################################################################################
# VA
#   Only the probability of an incorrect call was reported, ballots were not
#   directly compared
################################################################################


################################################################################
# VT 
################################################################################
DIR = "../state_data/VT/ready/"
DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'Vermont: statewide mixed')
vt = AutoStandardize(pd.read_csv(DIR+"Brandon_cleaned.csv"))
vt = pd.concat([vt, AutoStandardize(pd.read_csv(DIR+"Pownal_cleaned.csv"))])
vt = pd.concat([vt, AutoStandardize(pd.read_csv(DIR+"Randolph_cleaned.csv"))])
vt = pd.concat([vt, AutoStandardize(pd.read_csv(DIR+\
                                    "South_Burlington_cleaned.csv"))])
vt = pd.concat([vt, AutoStandardize(pd.read_csv(DIR+"Topsham_cleaned.csv"))])
vt = pd.concat([vt, AutoStandardize(pd.read_csv(DIR+"Warren_cleaned.csv"))])
vt = pd.concat([vt, AutoStandardize(pd.read_csv(DIR+"Worcester_cleaned.csv"))])
vt = vt.rename(columns = {
    "original_votes": "original",
    "audited_votes": "audited",
    "diff": "difference",
    "town": "township"
})
vt = vt.drop("date", axis = 1)
vt["original"] = vt["original"].astype(str)
vt["original"]  = vt["original"].str.replace(",", "", regex=True)
vt["original"] = vt["original"].astype(int)
vt = vt.iloc[:, 1:]
replacements = {
                'US REPRESENTATIVE': 'US HOUSE',
                'STATE SENATOR': 'STATE SENATE',
                'STATE REPRESENTATIVE': 'STATE HOUSE'
               }
vt = ReplaceInCol(df = vt,
                  replacements = replacements,
                  idCol = 'office',
                  targetCol = 'office'
                 )
#The number of US PRESIDENT cands is real: 21 official cands per sample ballots
replacements = {
                'TOTAL WRITE IN COUNTS': 'WRITE-IN',
                'GLORIA LARIVA': 'GLORIA LA RIVA'
               }
vt = ReplaceInCol(df = vt,
                  replacements = replacements,
                  idCol = 'candidate',
                  targetCol = 'candidate'
                 )
vt['method'] = 'MACHINE'
vt.loc[vt.township == 'TOPSHAM', 'method'] = 'MANUAL'
CheckNames(new = vt,
           merged = merged,
           LOG_FILE = LOG_FILE)
CheckStandards(new = vt,
               merged = merged,
               officesToCheck = vt["office"].unique(),
               LOG_FILE = LOG_FILE)
vt = AssignBaseOfficeCats(vt)
cats = {
        'LIEUTENANT GOVERNOR': 'GOVERNOR',
        'STATE TREASURER': 'SERVICES',
        'SECRETARY OF STATE': 'SERVICES',
        'AUDITOR OF ACCOUNTS': 'SERVICES',
        'HIGH BAILIFF': 'JUDICIAL'
       }
for k in cats.keys():
    vt.loc[vt.office == k, 'office_cat'] = cats[k]
ReportOfficeCats(new = vt, LOG_FILE = LOG_FILE)
merged = MergeData(vt, merged, VERBOSE)


################################################################################
# WA 
################################################################################
DIR = "../state_data/WA/ready/"
#King county
wa = AutoStandardize(pd.read_csv(DIR+"king_cleaned.csv"))
DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'Washington: King county manual')
wa = wa.rename(columns = {
    "original_votes": "original",
    "audited_votes": "audited",
    "diff": "difference",
})
#Drop the word precinct from the precinct column
wa["precinct"] = wa["precinct"].str.replace(" PRECINCT","",regex=True)
wa["district"] = ""
#In cases where it's actually a legislative direct, move it to that column
wa.loc[wa.precinct.str.contains("LEGISLATIVE DISTRICT"), "district"] = 5
wa.loc[wa.precinct.str.contains("LEGISLATIVE DISTRICT"), "precinct"] = ""
wa = wa.drop(["date", "recount_type"], axis = 1)
wa["method"] = "MANUAL"
wa.candidate = wa.candidate.str.strip()
wa.loc[wa.candidate == 'WRITE-INS', 'candidate'] = 'WRITE-IN'
wa.loc[wa.candidate == 'GLORIA LARIVA', 'candidate'] = 'GLORIA LA RIVA'
CheckNames(new = wa,
           merged = merged,
           LOG_FILE = LOG_FILE)
CheckStandards(new = wa,
               merged = merged,
               officesToCheck = wa["office"].unique(),
               LOG_FILE = LOG_FILE)
wa = AssignBaseOfficeCats(wa)
ReportOfficeCats(new = wa, LOG_FILE = LOG_FILE)
merged = MergeData(wa, merged, VERBOSE)

#Pierce county
wa = AutoStandardize(pd.read_csv(DIR+"pierce.csv"))
DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'Washington: Pierce county manual')
CheckNames(new = wa,
           merged = merged,
           LOG_FILE = LOG_FILE)
CheckStandards(new = wa,
               merged = merged,
               officesToCheck = wa["office"].unique(),
               LOG_FILE = LOG_FILE)
wa = AssignBaseOfficeCats(wa)
ReportOfficeCats(new = wa, LOG_FILE = LOG_FILE)
merged = MergeData(wa, merged, VERBOSE)


################################################################################
# Overall cleaning tasks
################################################################################
merged.candidate = merged.candidate.str.strip()

merged.loc[merged.candidate == 'WRITEIN', 'candidate'] = 'WRITE-IN'


################################################################################
# PARTY LABELS
#
# The following offices will receive party labels:
#   US PRESIDENT
#   US SENATE
#   US HOUSE
#   GOVERNOR
#   STATE SENATE
#   STATE HOUSE
################################################################################
DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'US PRESIDENT Party labels')
#US PRESIDENT: Fix remaining typoes and standardize names
fixes = {
         'JO JORGENSON': 'JO JORGENSEN',
         'ROCQUE DE LA FUENTE': 'ROQUE "ROCKY" DE LA FUENTE',
         'ROQUE DE LA FUENTE': 'ROQUE "ROCKY" DE LA FUENTE',
         "ROQUE 'ROCKY' DE LA FUENTE": 'ROQUE "ROCKY" DE LA FUENTE',
         'ROCKY DE LA FUENTE': 'ROQUE "ROCKY" DE LA FUENTE',
         'BLANKS': 'UNDERVOTES',
         'BLANK VOTES': 'UNDERVOTES',
         'DON BLAKENSHIP': 'DON BLANKENSHIP',
         'BRIAN CARROL': 'BRIAN CARROLL',
         'BERNIE SANDERS': 'WRITE-IN',
         'OTHERS': 'OTHER'
        }
for f in fixes.keys():
    merged.loc[merged.candidate == f, 'candidate'] = fixes[f]
party_map = {
             'JOSEPH R BIDEN': 'DEMOCRAT',
             'DONALD J TRUMP': 'REPUBLICAN',
             'JO JORGENSEN': 'LIBERTARIAN',
             'ALYSON KENNEDY': 'OTHER',
             'BILL HAMMONS': 'OTHER',
             'BLAKE HUBER': 'OTHER',
             'BRIAN CARROLL': 'OTHER',
             'BROCK PIERCE': 'OTHER',
             'C L GAMMON': 'OTHER',
             'CHRISTOPHER LAFONTAINE': 'OTHER',
             'DARIO HUNTER': 'OTHER',
             'DON BLANKENSHIP': 'OTHER',
             'GARY SWING': 'OTHER',
             'GLORIA LA RIVA': 'OTHER',
             'H BROOKE PAIGE': 'OTHER',
             'HOWIE HAWKINS': 'OTHER',
             'JEROME SEGAL': 'OTHER',
             'JESSE VENTURA': 'OTHER',
             'JOE MCHUGH': 'OTHER',
             'JOHN MYERS': 'OTHER',
             "JORDAN 'CANCER' SCOTT": 'OTHER',
             'JOSEPH KISHORE': 'OTHER',
             'KANYE WEST': 'OTHER',
             'KEITH MCCORMIC': 'OTHER',
             'KYLE KENLEY KOPITKE': 'OTHER',
             'MARK CHARLES': 'OTHER',
             'OTHERS': 'OTHER',
             'PHIL COLLINS': 'OTHER',
             'PRINCESS KHADIJAH MARYAM JACOB-FAMBRO': 'OTHER',
             'RICHARD DUNCAN': 'OTHER',
             'ROQUE "ROCKY" DE LA FUENTE': 'OTHER',
             'SHEILA TITTLE': 'OTHER',
             'ZACHARY SCALF': 'OTHER'
            }
for c in party_map.keys():
    merged.loc[merged.candidate == c, 'party'] = party_map[c]
ReportParties(merged, 'US PRESIDENT', LOG_FILE)

DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'US SENATE Party labels')
fixes = {
         "O'DONNELL": "JUSTIN O'DONNELL",
         'SHAHEEN': 'JEANNE SHAHEEN',
         'MESSNER': 'BRYANT "CORKY" MESSNER',
         'RIKIN MEHTA': 'RIKIN "RIK" MEHTA',
         'UNDER VOTE': 'UNDERVOTES'
        }
for f in fixes.keys():
    merged.loc[(merged.candidate == f) &
               (merged.office == 'US SENATE'),
               'candidate'] = fixes[f]
party_map = {
             'JOHN WAYNE HOWE': 'OTHER',
             'DANIEL DOYLE': 'OTHER',
             "STEPHAN 'SEKU' EVANS": 'OTHER',
             "JUSTIN O'DONNELL": 'LIBERTARIAN',
             'JEANNE SHAHEEN': 'DEMOCRAT',
             'BRYANT "CORKY" MESSNER': 'REPUBLICAN',
             'CORY BOOKER': 'DEMOCRAT',
             'RIKIN "RIK" MEHTA': 'REPUBLICAN',
             'VERONICA FERNANDEZ': 'OTHER',
             'DANIEL BURKE': 'OTHER',
             'MADELYN R HOFFMAN': 'OTHER',
             'BOB WALSH': 'LIBERTARIAN',
             'MARK V RONCHETTI': 'REPUBLICAN',
             'BEN R LUJAN': 'DEMOCRAT',
             'IBRAHIM A TAHER': 'OTHER',
             'GARY DYE': 'LIBERTARIAN',
             'JO RAE PERKINS': 'REPUBLICAN',
             'JEFF MERKLEY': 'DEMOCRAT',
             'BILL BLEDSOE': 'OTHER',
             'LINDSEY GRAHAM': 'REPUBLICAN',
             'JAIME HARRISON': 'DEMOCRAT'
            }
for c in party_map.keys():
    merged.loc[merged.candidate == c, 'party'] = party_map[c]
ReportParties(merged, 'US SENATE', LOG_FILE)

DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'US HOUSE Party labels')
fixes = {
         'HUFFMAN': 'JARED HUFFMAN',
         'MENSING': 'DALE K MENSING',
         'ALL OTHERS': 'OTHERS',
         'CHRISTTNE Y QUINN': 'CHRISTINE Y QUINN',
         'STEVE JONES': 'WRITE-IN'
        }
for f in fixes.keys():
    merged.loc[(merged.candidate == f) &
               (merged.office == 'US HOUSE'),
               'candidate'] = fixes[f]
party_map = {
             'JARED HUFFMAN': 'DEMOCRAT',
             'DALE K MENSING': 'REPUBLICAN',
             'BRYNNE KENNEDY': 'DEMOCRAT',
             'TOM MCCLINTOCK': 'REPUBLICAN',
             'DOUG LA MALFA': 'REPUBLICAN',
             'AUDREY DENNEY': 'DEMOCRAT',
             'SALUD CARBAJAL': 'DEMOCRAT',
             'ANDY CALDWELL': 'REPUBLICAN',
             'NEAL DUNN' : 'REPUBLICAN',
             'DANIEL WEBSTER': 'REPUBLICAN',
             'DANA MARIE COTTRELL': 'DEMOCRAT',
             'BRIAN MAST': 'REPUBLICAN',
             'PAM KEITH': 'DEMOCRAT',
             'K W MILLER': 'REPUBLICAN',
             'FIONA MCFARLAND': 'REPUBLICAN',
             'DRAKE BUCKMAN': 'DEMOCRAT',
             'AL LAWSON': 'DEMOCRAT',
             'GARY ADLER': 'REPUBLICAN',
             'KATHY CASTOR': 'DEMOCRAT',
             'CHRISTINE Y QUINN': 'REPUBLICAN',
             'SCOTT FRANKLIN': 'REPUBLICAN',
             'ALAN COHN': 'DEMOCRAT',
             'GUS MICHAEL BILIRAKIS': 'REPUBLICAN',
             'KIMBERLY WALKER': 'REPUBLICAN',
             'VERN BUCHANAN': 'REPUBLICAN',
             'MARGARET GOOD': 'DEMOCRAT',
             'KAT CAMMACK': 'REPUBLICAN',
             'ADAM CHRISTENSEN': 'DEMOCRAT',
             'JAMES P MCGOVERN': 'DEMOCRAT',
             'TRACY LYN LOVVORN': 'REPUBLICAN',
             'JAKE AUCHINCLOSS': 'DEMOCRAT',
             'JULIE A HALL': 'REPUBLICAN',
             'KATHERINE CLARK': 'DEMOCRAT',
             'CAROLINE COLARUSSO': 'REPUBLICAN',
             'SETH MOULTON': 'DEMOCRAT',
             'JOHN PAUL MORAN': 'REPUBLICAN',
             'AYANNA PRESSLEY': 'DEMOCRAT',
             'ROY A OWENS, SR': 'OTHER',
             'STEPHEN F LYNCH': 'DEMOCRAT',
             'JONATHAN D LOTT': 'OTHER',
             'BILL KEATING': 'DEMOCRAT',
             'HELEN BRADY': 'REPUBLICAN',
             'MICHAEL MANLEY': 'OTHER',
             'FRANK PALOTTA': 'DEMOCRAT',
             'AMY KENNEDY': 'DEMOCRAT',
             'JEFF VAN DREW': 'REPUBLICAN',
             'JENNA HARVEY': 'OTHER',
             'JESSE EHRNSTROM': 'LIBERTARIAN',
             'CHRISTOPHER H SMITH': 'REPUBLICAN',
             'STEPHANIE SCHMID': 'DEMOCRAT',
             'MICHAEL J RUFO': 'LIBERTARIAN',
             'ANDREW PACHUTA': 'REPUBLICAN',
             'HANK SCHROEDER': 'OTHER',
             'MARK RAZZOLI': 'REPUBLICAN',
             'BONNIE WATSON COLEMAN': 'DEMOCRAT',
             'EDWARD FORCHION': 'OTHER',
             'KENNETH J CODY': 'OTHER',
             'XOCHITL TORRES SMALL': 'DEMOCRAT',
             'YVETTE HERRELL': 'REPUBLICAN',
             'PETER R BECKER': 'OTHER',
             'MIRIAM BERRY': 'REPUBLICAN',
             'CHRISTOPHER HELALI': 'OTHER',
             'MARCIA HORNE': 'OTHER',
             'SHAWN ORR': 'OTHER',
             'JERRY TRUDELL': 'OTHER',
             'PETER WELCH': 'DEMOCRAT',
             'MICHAEL MANLEY': 'OTHER',
             'CHARLES ANTHONY': 'REPUBLICAN',
             'JOHN SARBANES': 'DEMOCRAT',
             'JOHNNY RAY SALLING': 'REPUBLICAN',
             'C A DUTCH RUPPERSBERGER': 'DEMOCRAT',
             'GEORGE E MCDERMOTT': 'REPUBLICAN',
             'ANTHONY G BROWN': 'DEMOCRAT',
             'CHRIS PALOMBI': 'REPUBLICAN',
             'STENY H HOYER': 'DEMOCRAT',
             'ANDY HARRIS': 'REPUBLICAN',
             'MIA MASON': 'DEMOCRAT',
             'GREGORY THOMAS COLL': 'REPUBLICAN',
             'JAMIE RASKIN': 'DEMOCRAT',
             'NEIL C PARROTT': 'REPUBLICAN',
             'DAVID J TRONE': 'DEMOCRAT',
             'GEORGE GLUCK': 'OTHER',
             'KIMBERLY KLACIK': 'REPUBLICAN',
             'KWEISI MFUME': 'DEMOCRAT',
             'MARK HACKETT': 'OTHER',
             'JIM CLYBURN': 'DEMOCRAT',
             'JOHN MCCOLLUM': 'REPUBLICAN'
            }
for c in party_map.keys():
    merged.loc[(merged.candidate == c) &
               (merged.office == 'US HOUSE'),
               'party'] = party_map[c]
merged.loc[(merged.office == 'US HOUSE') & (merged.party == 'NONPARTISAN'),
           'party'] = 'OTHER'
ReportParties(merged, 'US HOUSE', LOG_FILE)


DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'GOVERNOR Party labels')
fixes = {
         'PERRY': 'DARRYL PERRY',
         'FELTES': 'DAN FELTES',
         'SUNUNU': 'CHRIS SUNUNU'
        }
for f in fixes.keys():
    merged.loc[(merged.candidate == f) &
               (merged.office == 'GOVERNOR'),
               'candidate'] = fixes[f]
party_map = {
             'DARRYL PERRY': 'LIBERTARIAN',
             'DAN FELTES': 'DEMOCRAT',
             'CHRIS SUNUNU': 'REPUBLICAN',
             'WAYNE BILLADO III': 'OTHER',
             'MICHAEL A DEVOST': 'OTHER',
             'CHARLY DICKERSON': 'OTHER',
             'KEVIN HOYT': 'OTHER',
             'EMILY PEYTON': 'OTHER',
             'PHIL SCOTT': 'REPUBLICAN',
             'ERYNN HAZLETT WHITNEY': 'OTHER',
             'DAVID ZUCKERMAN': 'DEMOCRAT'
            }
for c in party_map.keys():
    merged.loc[(merged.candidate == c) &
               (merged.office == 'GOVERNOR'),
               'party'] = party_map[c]
ReportParties(merged, 'GOVERNOR', LOG_FILE)


DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'STATE SENATE Party labels')
fixes = {
         'S MONIQUE LIMON': 'MONIQUE LIMON',
         'CHRISTTNA PAYLAN': 'CHRISTINA PAYLAN',
         'ALL OTHERS': 'OTHERS'
        }
for f in fixes.keys():
    merged.loc[(merged.candidate == f) &
               (merged.office == 'STATE SENATE'),
               'candidate'] = fixes[f]
party_map = {
             'GREG MADDEN': 'OTHER',
             'PAMELA DAWN SWARTZ': 'DEMOCRAT',
             'BRIAN DAHLE': 'REPUBLICAN',
             'MONIQUE LIMON': 'DEMOCRAT',
             'GARY J MICHAELS': 'REPUBLICAN',
             'JENNIFER BRADLEY': 'REPUBLICAN',
             'MELINA RAYNA BARRAN': 'DEMOCRAT',
             'DARRYL ERVIN ROUSON': 'DEMOCRAT',
             'CHRISTINA PAYLAN': 'REPUBLICAN',
             'KATHY LEWIS': 'DEMOCRAT',
             'DANNY BURGESS': 'REPUBLICAN',
             'JIM BOYD': 'REPUBLICAN',
             'ANTHONY "TONY" ELDON': 'DEMOCRAT',
             'LORANNE AUSLEY': 'DEMOCRAT',
             'MARVA HARRIS PRESTON': 'DEMOCRAT',
             'GAYLE HARRELL': 'REPUBLICAN',
             'CORINNA BALDERRAMOS ROBINSON': 'DEMOCRAT',
             'JOHN VELIS': 'DEMOCRAT',
             'JOHN FRANCIS CAIN': 'REPUBLICAN',
             'REBECCA L RAUSCH': 'DEMOCRAT',
             'MATTHEW T KELLY': 'REPUBLICAN',
             'SUSAN LYNN MORAN': 'DEMOCRAT',
             'JAMES R MCMAHON, III': 'REPUBLICAN',
             "PATRICK M O'CONNOR": 'REPUBLICAN',
             'MEG WHEELER': 'DEMOCRAT',
             'ANNE M GOBI': 'DEMOCRAT',
             'STEVEN R HALL': 'REPUBLICAN',
             'DEAN A TRAN': 'REPUBLICAN',
             'JOHN J CRONIN': 'DEMOCRAT',
             'RYAN C FATTMAN': 'REPUBLICAN',
             'CHRISTINE CREAN': 'DEMOCRAT',
             'DICK ANDERSON': 'DEMOCRAT',
             'SHAULEEN HIGGINS': 'OTHER',
             'MELISSA T CRIBBINS': 'DEMOCRAT',
             'BRITTANY D CAVACAS': 'OTHER',
             'BRIAN "BC" COLLAMORE': 'REPUBLICAN',
             'LARRY COURCELLE': 'DEMOCRAT',
             'GREG COX': 'DEMOCRAT',
             'CHERYL M HOOKER': 'DEMOCRAT',
             'CASEY JENNINGS': 'OTHER',
             'RICHARD "SENSEI" LENCHUS': 'OTHER',
             'MICHAEL SHANK': 'OTHER',
             'JOSHUA C TERENZINI': 'REPUBLICAN',
             'TERRY K WILLIAMS': 'REPUBLICAN',
             'BRIAN CAMPION': 'DEMOCRAT',
             'MICHAEL "MIKE" HALL': 'REPUBLICAN',
             'MEG HANSEN': 'REPUBLICAN',
             'CASEY JENNINGS': 'OTHER',
             'RICHARD "SENSEI" LENCHUS': 'OTHER',
             'MICHAEL SHANK': 'OTHER',
             'KEVIN HOYT': 'OTHER',
             'DICK SEARS': 'DEMOCRAT',
             'BILL T HUFF': 'REPUBLICAN',
             'MARK A MACDONALD': 'DEMOCRAT',
             'PHIL BARUTH': 'DEMOCRAT',
             'SUSAN BOWEN': 'REPUBLICAN',
             'TOM CHASTENAY': 'REPUBLICAN',
             'THOMAS CHITTENDEN': 'DEMOCRAT',
             'JAMES EHLERS': 'DEMOCRAT',
             'KYLIE HOLLINGSWORTH': 'REPUBLICAN',
             'KUMULIA "KASE" LONG': 'REPUBLICAN',
             'VIRGINIA "GINNY" LYONS': 'DEMOCRAT',
             'CHRISTOPHER PEARSON': 'DEMOCRAT',
             'KESHA RAM': 'DEMOCRAT',
             'ERICKA REDIC': 'REPUBLICAN',
             'DEAN ROLLAND': 'REPUBLICAN',
             'MICHAEL SIROTKIN': 'DEMOCRAT',
             'JOE BENNING': 'REPUBLICAN',
             'MATTHEW CHOATE': 'DEMOCRAT',
             'JT DODGE': 'REPUBLICAN',
             'JANE KITCHEL': 'DEMOCRAT',
             'CHARLES W WILSON': 'REPUBLICAN',
             'KEN ALGER': 'REPUBLICAN',
             'ANN CUMMINGS': 'DEMOCRAT',
             'ANDREW PERCHLIK': 'DEMOCRAT',
             'ANTHONY POLLINA': 'DEMOCRAT',
             'DAWNMARIE TOMASI': 'REPUBLICAN',
             'DWAYNE TUCKER': 'REPUBLICAN',
             'PAUL VALLERAND': 'OTHER',
             'JOSH KIMBRELL': 'REPUBLICAN',
             'GLENN REESE': 'DEMOCRAT',
             'RONNIE CRAMER': 'REPUBLICAN',
             'CHRISTOPHER THIBAULT': 'DEMOCRAT',
             'RODNEY BUNCUM': 'REPUBLICAN',
             'MARGIE BRIGHT MATTHEWS': 'DEMOCRAT',
             'TOM CONNOR': 'REPUBLICAN',
             'VERNON STEPHENS': 'DEMOCRAT',
             'MIKE FANNING': 'DEMOCRAT',
             'ERIN MOSLEY': 'REPUBLICAN',
             'VINCENT SHEHEEN': 'DEMOCRAT',
             'PENRY GUSTAFSON': 'REPUBLICAN',
             'BRIAN ADAMS': 'REPUBLICAN',
             'DEBBIE CHATMAN BRYANT': 'DEMOCRAT'
            }
for c in party_map.keys():
    merged.loc[(merged.candidate == c) &
               (merged.office == 'STATE SENATE'),
               'party'] = party_map[c]
merged.loc[(merged.party == 'INDEPENDENT') &
           ((merged.office == 'STATE SENATE') |
            (merged.office == 'STATE HOUSE')
           ), 'party'] = 'OTHER'
ReportParties(merged, 'STATE SENATE', LOG_FILE)


DeclareState(LOG_FILE = LOG_FILE,
             BIG_SEP = BIG_SEP,
             state = 'STATE HOUSE Party labels')
fixes = {
         'SVOLOS': 'CHARLOTTE SVOLOS',
         'WOOD': 'JIM WOOD',
         'ALCIA BAYS': 'ALICIA BAYS',
         'ALBERT M GRIFITHS': 'ALBERT M GRIFFITHS',
         'ALL OTHERS': 'OTHERS',
         'ST LAURENT': 'KRISTI ST LAURENT',
         'AZIBERT': 'HENRI AZIBERT',
         'ROMAN': 'VALERIE ROMAN',
         'SINGUREANU': 'IOANA SINGUREANU',
         'SOTI': 'JULIUS SOTI',
         'LYNN': 'ROBERT J LYNN',
         'MCMAHON': 'CHARLES MCMAHON',
         'GRIFFIN': 'MARY GRIFFIN'
        }
for f in fixes.keys():
    merged.loc[(merged.candidate == f) &
               (merged.office == 'STATE HOUSE'),
               'candidate'] = fixes[f]
party_map = {
             'TIMOTHY R HUIT': 'OTHER',
             'DAVID W NEES': 'OTHER',
             'CHARLOTTE SVOLOS': 'REPUBLICAN',
             'JIM WOOD': 'DEMOCRAT',
             'FRANK BIGELOW': 'REPUBLICAN',
             'ELIZABETH BETANCOURT': 'DEMOCRAT',
             'MEGAN DAHLE': 'REPUBLICAN',
             'STEVE BENNETT': 'DEMOCRAT',
             'CHARLES W COLE': 'REPUBLICAN',
             'JORDAN CUNNINGHAM': 'REPUBLICAN',
             'DAWN ADDIS': 'DEMOCRAT',
             'CORD BYRD': 'REPUBLICAN',
             'JOSHUA HICKS': 'DEMOCRAT',
             'JASON SHOAF': 'REPUBLICAN',
             'TAYMOUR KHAN': 'DEMOCRAT',
             'JAYER WILLIAMSON': 'REPUBLICAN',
             'ANGELA L HOOVER': 'DEMOCRAT',
             'JAY TRUMBULL': 'DEMOCRAT',
             'ALICIA BAYS': 'DEMOCRAT',
             'CHUCK BRANNAN': 'REPUBLICAN',
             'ROCK ABOUJAOUDE JR': 'DEMOCRAT',
             'JACKIE TOLEDO': 'REPUBLICAN',
             'JULIE JENKINS': 'DEMOCRAT',
             'SUSAN L VALDES': 'DEMOCRAT',
             'ANGEL S URBINA CAPO': 'REPUBLICAN',
             'LAURIE RODRIGUEZ-PERSON': 'OTHER',
             'LAWRENCE MCCLURE': 'REPUBLICAN',
             'CLEO L "CL" TOWNSEND JR': 'DEMOCRAT',
             'TRACI KOSTER': 'REPUBLICAN',
             'JESSICA HARRINGTON': 'DEMOCRAT',
             'MIKE BELTRAN': 'REPUBLICAN',
             'SCOTT "MR H" HOTTENSTEIN': 'DEMOCRAT',
             'ANDREW LEARNED': 'DEMOCRAT',
             'MICHAEL OWEN': 'REPUBLICAN',
             'ALLISON TANT': 'DEMOCRAT',
             'JIM KALLINGER': 'REPUBLICAN',
             'BOBBY PAYNE': 'REPUBLICAN',
             'KIMBERLY DUGGER': 'DEMOCRAT',
             'ERIN GRAIL': 'REPUBLICAN',
             'ALBERT M GRIFFITHS': 'DEMOCRAT',
             'KAYLEE TUCK': 'REPUBLICAN',
             'LINDA TRIPP': 'DEMOCRAT',
             'TOBY OVERDORF': 'REPUBLICAN',
             'EDGAR "EJ" BERNIER': 'DEMOCRAT',
             'DANA TRABULSY': 'REPUBLICAN',
             'DELORES HOGAN JOHNSON': 'DEMOCRAT',
             'ALEX ANDRADE': 'REPUBLICAN',
             'CHRIS DOSEV': 'REPUBLICAN',
             'FRED "JAY" BARROWS': 'REPUBLICAN',
             'BRENDAN A ROCHE': 'DEMOCRAT',
             'CAROL A DOHERTY': 'DEMOCRAT',
             'KELLY A DOONER': 'REPUBLICAN',
             'JAMES M KELCOURSE': 'REPUBLICAN',
             'AMBER HEWETT': 'DEMOCRAT',
             'SHEILA C HARRINGTON': 'REPUBLICAN',
             'DEBORAH L BUSSER': 'DEMOCRAT',
             'SHAWN C DOOLEY': 'REPUBLICAN',
             'BRIAN P HAMLIN': 'DEMOCRAT',
             'PATRICK J KEARNEY': 'DEMOCRAT',
             'CRAIG VALDEZ': 'REPUBLICAN',
             'JOSHUA CUTLER': 'DEMOCRAT',
             'TATYANA MEDVEDEV SEMYROG': 'REPUBLICAN',
             'KATHLEEN LANATRA': 'DEMOCRAT',
             'SUMMER SCHMALING': 'REPUBLICAN',
             'JONATHAN D ZLOTNIK': 'DEMOCRAT',
             'BRUCE K CHESTER': 'REPUBLICAN',
             'NATALIE H HIGGINS': 'DEMOCRAT',
             'THOMAS F ARDINGER': 'REPUBLICAN',
             'DONALD R BERTHIAUME, JR': 'REPUBLICAN',
             'SAMUEL BIAGETTI': 'DEMOCRAT',
             'KRISTI ST LAURENT': 'DEMOCRAT',
             'HENRI AZIBERT': 'DEMOCRAT',
             'VALERIE ROMAN': 'DEMOCRAT',
             'IOANA SINGUREANU': 'DEMOCRAT',
             'JULIUS SOTI': 'REPUBLICAN',
             'ROBERT J LYNN': 'REPUBLICAN',
             'CHARLES MCMAHON': 'REPUBLICAN',
             'MARY GRIFFIN': 'REPUBLICAN',
             'STEPHANIE Z JEROME': 'DEMOCRAT',
             'CHARLES "BUTCH" SHAW': 'REPUBLICAN',
             'DAVID SOULIA': 'REPUBLICAN',
             'NELSON BROWNELL': 'DEMOCRAT',
             'KEVIN P DOERING': 'OTHER',
             'JAY HOOPER': 'DEMOCRAT',
             'PETER REED': 'OTHER',
             'JOSEPH ROCHE': 'REPUBLICAN',
             'CHARLES RUSSELL': 'REPUBLICAN',
             'LARRY SATCOWITZ': 'DEMOCRAT',
             'MAIDA F TOWNSEND': 'DEMOCRAT',
             'JOE PARSONS': 'REPUBLICAN',
             'KELSEY ROOT-WINCHESTER': 'DEMOCRAT',
             'KARI DOLAN': 'DEMOCRAT',
             'MAXINE GRAD': 'DEMOCRAT',
             'SHANNARA JOHNSON': 'REPUBLICAN',
             'TYLER MACHIA': 'REPUBLICAN',
             'AVRAM PATT': 'DEMOCRAT',
             'DAVID YACOVONE': 'DEMOCRAT',
             'EVELYN T ROBINSON': 'DEMOCRAT',
             'BILL HIXON': 'REPUBLICAN',
             'GLENN POSEY': 'REPUBLICAN',
             'JUSTIN BAMBERG': 'DEMOCRAT',
             'JORDAN PACE': 'REPUBLICAN',
             'KRYSTLE MATTHEWS': 'DEMOCRAT',
             'LIN BENNETT': 'REPUBLICAN',
             'BRAD JAYNE': 'OTHER',
             'ED SUTTON': 'DEMOCRAT'
            }
for c in party_map.keys():
    merged.loc[(merged.candidate == c) &
               (merged.office == 'STATE HOUSE'),
               'party'] = party_map[c]
ReportParties(merged, 'STATE HOUSE', LOG_FILE)

#Set all empty party values to the value NONE
merged.loc[merged.party == '', 'party'] = 'NONE'
merged.loc[pd.isna(merged.party), 'party'] = 'NONE'


#Now check the unique parties in the offices for any misspellings
for office in ['US PRESIDENT', 'US SENATE', 'US HOUSE', 'GOVERNOR',
               'STATE SENATE', 'STATE HOUSE']:
    parties = merged.loc[merged.office == office, 'party'].unique()
    parties = ['' if _ is np.nan else _ for _ in parties]
    parties = np.sort(parties)
    with open(LOG_FILE, "a+") as f:
        f.write(f"Unique parties for {office}: {parties}\n")

#Assign unique candidate IDs
merged['cand_id'] = merged['state'] + '_' +\
                    merged['office'] + '_' +\
                    merged['candidate']

merged = merged.rename(columns={'township': 'town'})


################################################################################
# Save the merged dataset
################################################################################
merged.to_csv(SAVE_DIR+"../data_for_analysis/all_audits.csv", index=False)
